From 2c77b1ba39ec43cb04d50340a7cff5c00f12dbdb Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Fri, 29 May 2026 00:58:20 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: W-61/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312 Source: Original Platform --- .gitattributes | 36 + README.md | 86 + all_results.json | 28 + config.json | 29 + eval_results.json | 22 + generation_config.json | 9 + margin_logs/margins.jsonl | 681 ++ margin_logs/step_0000001.npy | 3 + margin_logs/step_0000002.npy | 3 + margin_logs/step_0000003.npy | 3 + margin_logs/step_0000004.npy | 3 + margin_logs/step_0000005.npy | 3 + margin_logs/step_0000006.npy | 3 + margin_logs/step_0000007.npy | 3 + margin_logs/step_0000008.npy | 3 + margin_logs/step_0000009.npy | 3 + margin_logs/step_0000010.npy | 3 + margin_logs/step_0000011.npy | 3 + margin_logs/step_0000012.npy | 3 + margin_logs/step_0000013.npy | 3 + margin_logs/step_0000014.npy | 3 + margin_logs/step_0000015.npy | 3 + margin_logs/step_0000016.npy | 3 + margin_logs/step_0000017.npy | 3 + margin_logs/step_0000018.npy | 3 + margin_logs/step_0000019.npy | 3 + margin_logs/step_0000020.npy | 3 + margin_logs/step_0000021.npy | 3 + margin_logs/step_0000022.npy | 3 + margin_logs/step_0000023.npy | 3 + margin_logs/step_0000024.npy | 3 + margin_logs/step_0000025.npy | 3 + margin_logs/step_0000026.npy | 3 + margin_logs/step_0000027.npy | 3 + margin_logs/step_0000028.npy | 3 + margin_logs/step_0000029.npy | 3 + margin_logs/step_0000030.npy | 3 + margin_logs/step_0000031.npy | 3 + margin_logs/step_0000032.npy | 3 + margin_logs/step_0000033.npy | 3 + margin_logs/step_0000034.npy | 3 + margin_logs/step_0000035.npy | 3 + margin_logs/step_0000036.npy | 3 + margin_logs/step_0000037.npy | 3 + margin_logs/step_0000038.npy | 3 + margin_logs/step_0000039.npy | 3 + margin_logs/step_0000040.npy | 3 + margin_logs/step_0000041.npy | 3 + margin_logs/step_0000042.npy | 3 + margin_logs/step_0000043.npy | 3 + margin_logs/step_0000044.npy | 3 + margin_logs/step_0000045.npy | 3 + margin_logs/step_0000046.npy | 3 + margin_logs/step_0000047.npy | 3 + margin_logs/step_0000048.npy | 3 + margin_logs/step_0000049.npy | 3 + margin_logs/step_0000050.npy | 3 + margin_logs/step_0000051.npy | 3 + margin_logs/step_0000052.npy | 3 + margin_logs/step_0000053.npy | 3 + margin_logs/step_0000054.npy | 3 + margin_logs/step_0000055.npy | 3 + margin_logs/step_0000056.npy | 3 + margin_logs/step_0000057.npy | 3 + margin_logs/step_0000058.npy | 3 + margin_logs/step_0000059.npy | 3 + margin_logs/step_0000060.npy | 3 + margin_logs/step_0000061.npy | 3 + margin_logs/step_0000062.npy | 3 + margin_logs/step_0000063.npy | 3 + margin_logs/step_0000064.npy | 3 + margin_logs/step_0000065.npy | 3 + margin_logs/step_0000066.npy | 3 + margin_logs/step_0000067.npy | 3 + margin_logs/step_0000068.npy | 3 + margin_logs/step_0000069.npy | 3 + margin_logs/step_0000070.npy | 3 + margin_logs/step_0000071.npy | 3 + margin_logs/step_0000072.npy | 3 + margin_logs/step_0000073.npy | 3 + margin_logs/step_0000074.npy | 3 + margin_logs/step_0000075.npy | 3 + margin_logs/step_0000076.npy | 3 + margin_logs/step_0000077.npy | 3 + margin_logs/step_0000078.npy | 3 + margin_logs/step_0000079.npy | 3 + margin_logs/step_0000080.npy | 3 + margin_logs/step_0000081.npy | 3 + margin_logs/step_0000082.npy | 3 + margin_logs/step_0000083.npy | 3 + margin_logs/step_0000084.npy | 3 + margin_logs/step_0000085.npy | 3 + margin_logs/step_0000086.npy | 3 + margin_logs/step_0000087.npy | 3 + margin_logs/step_0000088.npy | 3 + margin_logs/step_0000089.npy | 3 + margin_logs/step_0000090.npy | 3 + margin_logs/step_0000091.npy | 3 + margin_logs/step_0000092.npy | 3 + margin_logs/step_0000093.npy | 3 + margin_logs/step_0000094.npy | 3 + margin_logs/step_0000095.npy | 3 + margin_logs/step_0000096.npy | 3 + margin_logs/step_0000097.npy | 3 + margin_logs/step_0000098.npy | 3 + margin_logs/step_0000099.npy | 3 + margin_logs/step_0000100.npy | 3 + margin_logs/step_0000101.npy | 3 + margin_logs/step_0000102.npy | 3 + margin_logs/step_0000103.npy | 3 + margin_logs/step_0000104.npy | 3 + margin_logs/step_0000105.npy | 3 + margin_logs/step_0000106.npy | 3 + margin_logs/step_0000107.npy | 3 + margin_logs/step_0000108.npy | 3 + margin_logs/step_0000109.npy | 3 + margin_logs/step_0000110.npy | 3 + margin_logs/step_0000111.npy | 3 + margin_logs/step_0000112.npy | 3 + margin_logs/step_0000113.npy | 3 + margin_logs/step_0000114.npy | 3 + margin_logs/step_0000115.npy | 3 + margin_logs/step_0000116.npy | 3 + margin_logs/step_0000117.npy | 3 + margin_logs/step_0000118.npy | 3 + margin_logs/step_0000119.npy | 3 + margin_logs/step_0000120.npy | 3 + margin_logs/step_0000121.npy | 3 + margin_logs/step_0000122.npy | 3 + margin_logs/step_0000123.npy | 3 + margin_logs/step_0000124.npy | 3 + margin_logs/step_0000125.npy | 3 + margin_logs/step_0000126.npy | 3 + margin_logs/step_0000127.npy | 3 + margin_logs/step_0000128.npy | 3 + margin_logs/step_0000129.npy | 3 + margin_logs/step_0000130.npy | 3 + margin_logs/step_0000131.npy | 3 + margin_logs/step_0000132.npy | 3 + margin_logs/step_0000133.npy | 3 + margin_logs/step_0000134.npy | 3 + margin_logs/step_0000135.npy | 3 + margin_logs/step_0000136.npy | 3 + margin_logs/step_0000137.npy | 3 + margin_logs/step_0000138.npy | 3 + margin_logs/step_0000139.npy | 3 + margin_logs/step_0000140.npy | 3 + margin_logs/step_0000141.npy | 3 + margin_logs/step_0000142.npy | 3 + margin_logs/step_0000143.npy | 3 + margin_logs/step_0000144.npy | 3 + margin_logs/step_0000145.npy | 3 + margin_logs/step_0000146.npy | 3 + margin_logs/step_0000147.npy | 3 + margin_logs/step_0000148.npy | 3 + margin_logs/step_0000149.npy | 3 + margin_logs/step_0000150.npy | 3 + margin_logs/step_0000151.npy | 3 + margin_logs/step_0000152.npy | 3 + margin_logs/step_0000153.npy | 3 + margin_logs/step_0000154.npy | 3 + margin_logs/step_0000155.npy | 3 + margin_logs/step_0000156.npy | 3 + margin_logs/step_0000157.npy | 3 + margin_logs/step_0000158.npy | 3 + margin_logs/step_0000159.npy | 3 + margin_logs/step_0000160.npy | 3 + margin_logs/step_0000161.npy | 3 + margin_logs/step_0000162.npy | 3 + margin_logs/step_0000163.npy | 3 + margin_logs/step_0000164.npy | 3 + margin_logs/step_0000165.npy | 3 + margin_logs/step_0000166.npy | 3 + margin_logs/step_0000167.npy | 3 + margin_logs/step_0000168.npy | 3 + margin_logs/step_0000169.npy | 3 + margin_logs/step_0000170.npy | 3 + margin_logs/step_0000171.npy | 3 + margin_logs/step_0000172.npy | 3 + margin_logs/step_0000173.npy | 3 + margin_logs/step_0000174.npy | 3 + margin_logs/step_0000175.npy | 3 + margin_logs/step_0000176.npy | 3 + margin_logs/step_0000177.npy | 3 + margin_logs/step_0000178.npy | 3 + margin_logs/step_0000179.npy | 3 + margin_logs/step_0000180.npy | 3 + margin_logs/step_0000181.npy | 3 + margin_logs/step_0000182.npy | 3 + margin_logs/step_0000183.npy | 3 + margin_logs/step_0000184.npy | 3 + margin_logs/step_0000185.npy | 3 + margin_logs/step_0000186.npy | 3 + margin_logs/step_0000187.npy | 3 + margin_logs/step_0000188.npy | 3 + margin_logs/step_0000189.npy | 3 + margin_logs/step_0000190.npy | 3 + margin_logs/step_0000191.npy | 3 + margin_logs/step_0000192.npy | 3 + margin_logs/step_0000193.npy | 3 + margin_logs/step_0000194.npy | 3 + margin_logs/step_0000195.npy | 3 + margin_logs/step_0000196.npy | 3 + margin_logs/step_0000197.npy | 3 + margin_logs/step_0000198.npy | 3 + margin_logs/step_0000199.npy | 3 + margin_logs/step_0000200.npy | 3 + margin_logs/step_0000201.npy | 3 + margin_logs/step_0000202.npy | 3 + margin_logs/step_0000203.npy | 3 + margin_logs/step_0000204.npy | 3 + margin_logs/step_0000205.npy | 3 + margin_logs/step_0000206.npy | 3 + margin_logs/step_0000207.npy | 3 + margin_logs/step_0000208.npy | 3 + margin_logs/step_0000209.npy | 3 + margin_logs/step_0000210.npy | 3 + margin_logs/step_0000211.npy | 3 + margin_logs/step_0000212.npy | 3 + margin_logs/step_0000213.npy | 3 + margin_logs/step_0000214.npy | 3 + margin_logs/step_0000215.npy | 3 + margin_logs/step_0000216.npy | 3 + margin_logs/step_0000217.npy | 3 + margin_logs/step_0000218.npy | 3 + margin_logs/step_0000219.npy | 3 + margin_logs/step_0000220.npy | 3 + margin_logs/step_0000221.npy | 3 + margin_logs/step_0000222.npy | 3 + margin_logs/step_0000223.npy | 3 + margin_logs/step_0000224.npy | 3 + margin_logs/step_0000225.npy | 3 + margin_logs/step_0000226.npy | 3 + margin_logs/step_0000227.npy | 3 + margin_logs/step_0000228.npy | 3 + margin_logs/step_0000229.npy | 3 + margin_logs/step_0000230.npy | 3 + margin_logs/step_0000231.npy | 3 + margin_logs/step_0000232.npy | 3 + margin_logs/step_0000233.npy | 3 + margin_logs/step_0000234.npy | 3 + margin_logs/step_0000235.npy | 3 + margin_logs/step_0000236.npy | 3 + margin_logs/step_0000237.npy | 3 + margin_logs/step_0000238.npy | 3 + margin_logs/step_0000239.npy | 3 + margin_logs/step_0000240.npy | 3 + margin_logs/step_0000241.npy | 3 + margin_logs/step_0000242.npy | 3 + margin_logs/step_0000243.npy | 3 + margin_logs/step_0000244.npy | 3 + margin_logs/step_0000245.npy | 3 + margin_logs/step_0000246.npy | 3 + margin_logs/step_0000247.npy | 3 + margin_logs/step_0000248.npy | 3 + margin_logs/step_0000249.npy | 3 + margin_logs/step_0000250.npy | 3 + margin_logs/step_0000251.npy | 3 + margin_logs/step_0000252.npy | 3 + margin_logs/step_0000253.npy | 3 + margin_logs/step_0000254.npy | 3 + margin_logs/step_0000255.npy | 3 + margin_logs/step_0000256.npy | 3 + margin_logs/step_0000257.npy | 3 + margin_logs/step_0000258.npy | 3 + margin_logs/step_0000259.npy | 3 + margin_logs/step_0000260.npy | 3 + margin_logs/step_0000261.npy | 3 + margin_logs/step_0000262.npy | 3 + margin_logs/step_0000263.npy | 3 + margin_logs/step_0000264.npy | 3 + margin_logs/step_0000265.npy | 3 + margin_logs/step_0000266.npy | 3 + margin_logs/step_0000267.npy | 3 + margin_logs/step_0000268.npy | 3 + margin_logs/step_0000269.npy | 3 + margin_logs/step_0000270.npy | 3 + margin_logs/step_0000271.npy | 3 + margin_logs/step_0000272.npy | 3 + margin_logs/step_0000273.npy | 3 + margin_logs/step_0000274.npy | 3 + margin_logs/step_0000275.npy | 3 + margin_logs/step_0000276.npy | 3 + margin_logs/step_0000277.npy | 3 + margin_logs/step_0000278.npy | 3 + margin_logs/step_0000279.npy | 3 + margin_logs/step_0000280.npy | 3 + margin_logs/step_0000281.npy | 3 + margin_logs/step_0000282.npy | 3 + margin_logs/step_0000283.npy | 3 + margin_logs/step_0000284.npy | 3 + margin_logs/step_0000285.npy | 3 + margin_logs/step_0000286.npy | 3 + margin_logs/step_0000287.npy | 3 + margin_logs/step_0000288.npy | 3 + margin_logs/step_0000289.npy | 3 + margin_logs/step_0000290.npy | 3 + margin_logs/step_0000291.npy | 3 + margin_logs/step_0000292.npy | 3 + margin_logs/step_0000293.npy | 3 + margin_logs/step_0000294.npy | 3 + margin_logs/step_0000295.npy | 3 + margin_logs/step_0000296.npy | 3 + margin_logs/step_0000297.npy | 3 + margin_logs/step_0000298.npy | 3 + margin_logs/step_0000299.npy | 3 + margin_logs/step_0000300.npy | 3 + margin_logs/step_0000301.npy | 3 + margin_logs/step_0000302.npy | 3 + margin_logs/step_0000303.npy | 3 + margin_logs/step_0000304.npy | 3 + margin_logs/step_0000305.npy | 3 + margin_logs/step_0000306.npy | 3 + margin_logs/step_0000307.npy | 3 + margin_logs/step_0000308.npy | 3 + margin_logs/step_0000309.npy | 3 + margin_logs/step_0000310.npy | 3 + margin_logs/step_0000311.npy | 3 + margin_logs/step_0000312.npy | 3 + margin_logs/step_0000313.npy | 3 + margin_logs/step_0000314.npy | 3 + margin_logs/step_0000315.npy | 3 + margin_logs/step_0000316.npy | 3 + margin_logs/step_0000317.npy | 3 + margin_logs/step_0000318.npy | 3 + margin_logs/step_0000319.npy | 3 + margin_logs/step_0000320.npy | 3 + margin_logs/step_0000321.npy | 3 + margin_logs/step_0000322.npy | 3 + margin_logs/step_0000323.npy | 3 + margin_logs/step_0000324.npy | 3 + margin_logs/step_0000325.npy | 3 + margin_logs/step_0000326.npy | 3 + margin_logs/step_0000327.npy | 3 + margin_logs/step_0000328.npy | 3 + margin_logs/step_0000329.npy | 3 + margin_logs/step_0000330.npy | 3 + margin_logs/step_0000331.npy | 3 + margin_logs/step_0000332.npy | 3 + margin_logs/step_0000333.npy | 3 + margin_logs/step_0000334.npy | 3 + margin_logs/step_0000335.npy | 3 + margin_logs/step_0000336.npy | 3 + margin_logs/step_0000337.npy | 3 + margin_logs/step_0000338.npy | 3 + margin_logs/step_0000339.npy | 3 + margin_logs/step_0000340.npy | 3 + margin_logs/step_0000341.npy | 3 + margin_logs/step_0000342.npy | 3 + margin_logs/step_0000343.npy | 3 + margin_logs/step_0000344.npy | 3 + margin_logs/step_0000345.npy | 3 + margin_logs/step_0000346.npy | 3 + margin_logs/step_0000347.npy | 3 + margin_logs/step_0000348.npy | 3 + margin_logs/step_0000349.npy | 3 + margin_logs/step_0000350.npy | 3 + margin_logs/step_0000351.npy | 3 + margin_logs/step_0000352.npy | 3 + margin_logs/step_0000353.npy | 3 + margin_logs/step_0000354.npy | 3 + margin_logs/step_0000355.npy | 3 + margin_logs/step_0000356.npy | 3 + margin_logs/step_0000357.npy | 3 + margin_logs/step_0000358.npy | 3 + margin_logs/step_0000359.npy | 3 + margin_logs/step_0000360.npy | 3 + margin_logs/step_0000361.npy | 3 + margin_logs/step_0000362.npy | 3 + margin_logs/step_0000363.npy | 3 + margin_logs/step_0000364.npy | 3 + margin_logs/step_0000365.npy | 3 + margin_logs/step_0000366.npy | 3 + margin_logs/step_0000367.npy | 3 + margin_logs/step_0000368.npy | 3 + margin_logs/step_0000369.npy | 3 + margin_logs/step_0000370.npy | 3 + margin_logs/step_0000371.npy | 3 + margin_logs/step_0000372.npy | 3 + margin_logs/step_0000373.npy | 3 + margin_logs/step_0000374.npy | 3 + margin_logs/step_0000375.npy | 3 + margin_logs/step_0000376.npy | 3 + margin_logs/step_0000377.npy | 3 + margin_logs/step_0000378.npy | 3 + margin_logs/step_0000379.npy | 3 + margin_logs/step_0000380.npy | 3 + margin_logs/step_0000381.npy | 3 + margin_logs/step_0000382.npy | 3 + margin_logs/step_0000383.npy | 3 + margin_logs/step_0000384.npy | 3 + margin_logs/step_0000385.npy | 3 + margin_logs/step_0000386.npy | 3 + margin_logs/step_0000387.npy | 3 + margin_logs/step_0000388.npy | 3 + margin_logs/step_0000389.npy | 3 + margin_logs/step_0000390.npy | 3 + margin_logs/step_0000391.npy | 3 + margin_logs/step_0000392.npy | 3 + margin_logs/step_0000393.npy | 3 + margin_logs/step_0000394.npy | 3 + margin_logs/step_0000395.npy | 3 + margin_logs/step_0000396.npy | 3 + margin_logs/step_0000397.npy | 3 + margin_logs/step_0000398.npy | 3 + margin_logs/step_0000399.npy | 3 + margin_logs/step_0000400.npy | 3 + margin_logs/step_0000401.npy | 3 + margin_logs/step_0000402.npy | 3 + margin_logs/step_0000403.npy | 3 + margin_logs/step_0000404.npy | 3 + margin_logs/step_0000405.npy | 3 + margin_logs/step_0000406.npy | 3 + margin_logs/step_0000407.npy | 3 + margin_logs/step_0000408.npy | 3 + margin_logs/step_0000409.npy | 3 + margin_logs/step_0000410.npy | 3 + margin_logs/step_0000411.npy | 3 + margin_logs/step_0000412.npy | 3 + margin_logs/step_0000413.npy | 3 + margin_logs/step_0000414.npy | 3 + margin_logs/step_0000415.npy | 3 + margin_logs/step_0000416.npy | 3 + margin_logs/step_0000417.npy | 3 + margin_logs/step_0000418.npy | 3 + margin_logs/step_0000419.npy | 3 + margin_logs/step_0000420.npy | 3 + margin_logs/step_0000421.npy | 3 + margin_logs/step_0000422.npy | 3 + margin_logs/step_0000423.npy | 3 + margin_logs/step_0000424.npy | 3 + margin_logs/step_0000425.npy | 3 + margin_logs/step_0000426.npy | 3 + margin_logs/step_0000427.npy | 3 + margin_logs/step_0000428.npy | 3 + margin_logs/step_0000429.npy | 3 + margin_logs/step_0000430.npy | 3 + margin_logs/step_0000431.npy | 3 + margin_logs/step_0000432.npy | 3 + margin_logs/step_0000433.npy | 3 + margin_logs/step_0000434.npy | 3 + margin_logs/step_0000435.npy | 3 + margin_logs/step_0000436.npy | 3 + margin_logs/step_0000437.npy | 3 + margin_logs/step_0000438.npy | 3 + margin_logs/step_0000439.npy | 3 + margin_logs/step_0000440.npy | 3 + margin_logs/step_0000441.npy | 3 + margin_logs/step_0000442.npy | 3 + margin_logs/step_0000443.npy | 3 + margin_logs/step_0000444.npy | 3 + margin_logs/step_0000445.npy | 3 + margin_logs/step_0000446.npy | 3 + margin_logs/step_0000447.npy | 3 + margin_logs/step_0000448.npy | 3 + margin_logs/step_0000449.npy | 3 + margin_logs/step_0000450.npy | 3 + margin_logs/step_0000451.npy | 3 + margin_logs/step_0000452.npy | 3 + margin_logs/step_0000453.npy | 3 + margin_logs/step_0000454.npy | 3 + margin_logs/step_0000455.npy | 3 + margin_logs/step_0000456.npy | 3 + margin_logs/step_0000457.npy | 3 + margin_logs/step_0000458.npy | 3 + margin_logs/step_0000459.npy | 3 + margin_logs/step_0000460.npy | 3 + margin_logs/step_0000461.npy | 3 + margin_logs/step_0000462.npy | 3 + margin_logs/step_0000463.npy | 3 + margin_logs/step_0000464.npy | 3 + margin_logs/step_0000465.npy | 3 + margin_logs/step_0000466.npy | 3 + margin_logs/step_0000467.npy | 3 + margin_logs/step_0000468.npy | 3 + margin_logs/step_0000469.npy | 3 + margin_logs/step_0000470.npy | 3 + margin_logs/step_0000471.npy | 3 + margin_logs/step_0000472.npy | 3 + margin_logs/step_0000473.npy | 3 + margin_logs/step_0000474.npy | 3 + margin_logs/step_0000475.npy | 3 + margin_logs/step_0000476.npy | 3 + margin_logs/step_0000477.npy | 3 + margin_logs/step_0000478.npy | 3 + margin_logs/step_0000479.npy | 3 + margin_logs/step_0000480.npy | 3 + margin_logs/step_0000481.npy | 3 + margin_logs/step_0000482.npy | 3 + margin_logs/step_0000483.npy | 3 + margin_logs/step_0000484.npy | 3 + margin_logs/step_0000485.npy | 3 + margin_logs/step_0000486.npy | 3 + margin_logs/step_0000487.npy | 3 + margin_logs/step_0000488.npy | 3 + margin_logs/step_0000489.npy | 3 + margin_logs/step_0000490.npy | 3 + margin_logs/step_0000491.npy | 3 + margin_logs/step_0000492.npy | 3 + margin_logs/step_0000493.npy | 3 + margin_logs/step_0000494.npy | 3 + margin_logs/step_0000495.npy | 3 + margin_logs/step_0000496.npy | 3 + margin_logs/step_0000497.npy | 3 + margin_logs/step_0000498.npy | 3 + margin_logs/step_0000499.npy | 3 + margin_logs/step_0000500.npy | 3 + margin_logs/step_0000501.npy | 3 + margin_logs/step_0000502.npy | 3 + margin_logs/step_0000503.npy | 3 + margin_logs/step_0000504.npy | 3 + margin_logs/step_0000505.npy | 3 + margin_logs/step_0000506.npy | 3 + margin_logs/step_0000507.npy | 3 + margin_logs/step_0000508.npy | 3 + margin_logs/step_0000509.npy | 3 + margin_logs/step_0000510.npy | 3 + margin_logs/step_0000511.npy | 3 + margin_logs/step_0000512.npy | 3 + margin_logs/step_0000513.npy | 3 + margin_logs/step_0000514.npy | 3 + margin_logs/step_0000515.npy | 3 + margin_logs/step_0000516.npy | 3 + margin_logs/step_0000517.npy | 3 + margin_logs/step_0000518.npy | 3 + margin_logs/step_0000519.npy | 3 + margin_logs/step_0000520.npy | 3 + margin_logs/step_0000521.npy | 3 + margin_logs/step_0000522.npy | 3 + margin_logs/step_0000523.npy | 3 + margin_logs/step_0000524.npy | 3 + margin_logs/step_0000525.npy | 3 + margin_logs/step_0000526.npy | 3 + margin_logs/step_0000527.npy | 3 + margin_logs/step_0000528.npy | 3 + margin_logs/step_0000529.npy | 3 + margin_logs/step_0000530.npy | 3 + margin_logs/step_0000531.npy | 3 + margin_logs/step_0000532.npy | 3 + margin_logs/step_0000533.npy | 3 + margin_logs/step_0000534.npy | 3 + margin_logs/step_0000535.npy | 3 + margin_logs/step_0000536.npy | 3 + margin_logs/step_0000537.npy | 3 + margin_logs/step_0000538.npy | 3 + margin_logs/step_0000539.npy | 3 + margin_logs/step_0000540.npy | 3 + margin_logs/step_0000541.npy | 3 + margin_logs/step_0000542.npy | 3 + margin_logs/step_0000543.npy | 3 + margin_logs/step_0000544.npy | 3 + margin_logs/step_0000545.npy | 3 + margin_logs/step_0000546.npy | 3 + margin_logs/step_0000547.npy | 3 + margin_logs/step_0000548.npy | 3 + margin_logs/step_0000549.npy | 3 + margin_logs/step_0000550.npy | 3 + margin_logs/step_0000551.npy | 3 + margin_logs/step_0000552.npy | 3 + margin_logs/step_0000553.npy | 3 + margin_logs/step_0000554.npy | 3 + margin_logs/step_0000555.npy | 3 + margin_logs/step_0000556.npy | 3 + margin_logs/step_0000557.npy | 3 + margin_logs/step_0000558.npy | 3 + margin_logs/step_0000559.npy | 3 + margin_logs/step_0000560.npy | 3 + margin_logs/step_0000561.npy | 3 + margin_logs/step_0000562.npy | 3 + margin_logs/step_0000563.npy | 3 + margin_logs/step_0000564.npy | 3 + margin_logs/step_0000565.npy | 3 + margin_logs/step_0000566.npy | 3 + margin_logs/step_0000567.npy | 3 + margin_logs/step_0000568.npy | 3 + margin_logs/step_0000569.npy | 3 + margin_logs/step_0000570.npy | 3 + margin_logs/step_0000571.npy | 3 + margin_logs/step_0000572.npy | 3 + margin_logs/step_0000573.npy | 3 + margin_logs/step_0000574.npy | 3 + margin_logs/step_0000575.npy | 3 + margin_logs/step_0000576.npy | 3 + margin_logs/step_0000577.npy | 3 + margin_logs/step_0000578.npy | 3 + margin_logs/step_0000579.npy | 3 + margin_logs/step_0000580.npy | 3 + margin_logs/step_0000581.npy | 3 + margin_logs/step_0000582.npy | 3 + margin_logs/step_0000583.npy | 3 + margin_logs/step_0000584.npy | 3 + margin_logs/step_0000585.npy | 3 + margin_logs/step_0000586.npy | 3 + margin_logs/step_0000587.npy | 3 + margin_logs/step_0000588.npy | 3 + margin_logs/step_0000589.npy | 3 + margin_logs/step_0000590.npy | 3 + margin_logs/step_0000591.npy | 3 + margin_logs/step_0000592.npy | 3 + margin_logs/step_0000593.npy | 3 + margin_logs/step_0000594.npy | 3 + margin_logs/step_0000595.npy | 3 + margin_logs/step_0000596.npy | 3 + margin_logs/step_0000597.npy | 3 + margin_logs/step_0000598.npy | 3 + margin_logs/step_0000599.npy | 3 + margin_logs/step_0000600.npy | 3 + margin_logs/step_0000601.npy | 3 + margin_logs/step_0000602.npy | 3 + margin_logs/step_0000603.npy | 3 + margin_logs/step_0000604.npy | 3 + margin_logs/step_0000605.npy | 3 + margin_logs/step_0000606.npy | 3 + margin_logs/step_0000607.npy | 3 + margin_logs/step_0000608.npy | 3 + margin_logs/step_0000609.npy | 3 + margin_logs/step_0000610.npy | 3 + margin_logs/step_0000611.npy | 3 + margin_logs/step_0000612.npy | 3 + margin_logs/step_0000613.npy | 3 + margin_logs/step_0000614.npy | 3 + margin_logs/step_0000615.npy | 3 + margin_logs/step_0000616.npy | 3 + margin_logs/step_0000617.npy | 3 + margin_logs/step_0000618.npy | 3 + margin_logs/step_0000619.npy | 3 + margin_logs/step_0000620.npy | 3 + margin_logs/step_0000621.npy | 3 + margin_logs/step_0000622.npy | 3 + margin_logs/step_0000623.npy | 3 + margin_logs/step_0000624.npy | 3 + margin_logs/step_0000625.npy | 3 + margin_logs/step_0000626.npy | 3 + margin_logs/step_0000627.npy | 3 + margin_logs/step_0000628.npy | 3 + margin_logs/step_0000629.npy | 3 + margin_logs/step_0000630.npy | 3 + margin_logs/step_0000631.npy | 3 + margin_logs/step_0000632.npy | 3 + margin_logs/step_0000633.npy | 3 + margin_logs/step_0000634.npy | 3 + margin_logs/step_0000635.npy | 3 + margin_logs/step_0000636.npy | 3 + margin_logs/step_0000637.npy | 3 + margin_logs/step_0000638.npy | 3 + margin_logs/step_0000639.npy | 3 + margin_logs/step_0000640.npy | 3 + margin_logs/step_0000641.npy | 3 + margin_logs/step_0000642.npy | 3 + margin_logs/step_0000643.npy | 3 + margin_logs/step_0000644.npy | 3 + margin_logs/step_0000645.npy | 3 + margin_logs/step_0000646.npy | 3 + margin_logs/step_0000647.npy | 3 + margin_logs/step_0000648.npy | 3 + margin_logs/step_0000649.npy | 3 + margin_logs/step_0000650.npy | 3 + margin_logs/step_0000651.npy | 3 + margin_logs/step_0000652.npy | 3 + margin_logs/step_0000653.npy | 3 + margin_logs/step_0000654.npy | 3 + margin_logs/step_0000655.npy | 3 + margin_logs/step_0000656.npy | 3 + margin_logs/step_0000657.npy | 3 + margin_logs/step_0000658.npy | 3 + margin_logs/step_0000659.npy | 3 + margin_logs/step_0000660.npy | 3 + margin_logs/step_0000661.npy | 3 + margin_logs/step_0000662.npy | 3 + margin_logs/step_0000663.npy | 3 + margin_logs/step_0000664.npy | 3 + margin_logs/step_0000665.npy | 3 + margin_logs/step_0000666.npy | 3 + margin_logs/step_0000667.npy | 3 + margin_logs/step_0000668.npy | 3 + margin_logs/step_0000669.npy | 3 + margin_logs/step_0000670.npy | 3 + margin_logs/step_0000671.npy | 3 + margin_logs/step_0000672.npy | 3 + margin_logs/step_0000673.npy | 3 + margin_logs/step_0000674.npy | 3 + margin_logs/step_0000675.npy | 3 + margin_logs/step_0000676.npy | 3 + margin_logs/step_0000677.npy | 3 + margin_logs/step_0000678.npy | 3 + margin_logs/step_0000679.npy | 3 + margin_logs/step_0000680.npy | 3 + margin_logs/step_0000681.npy | 3 + model-00001-of-00007.safetensors | 3 + model-00002-of-00007.safetensors | 3 + model-00003-of-00007.safetensors | 3 + model-00004-of-00007.safetensors | 3 + model-00005-of-00007.safetensors | 3 + model-00006-of-00007.safetensors | 3 + model-00007-of-00007.safetensors | 3 + model.safetensors.index.json | 298 + special_tokens_map.json | 23 + tokenizer.json | 3 + tokenizer_config.json | 2064 ++++ train.log | 1739 ++++ train_results.json | 9 + trainer_state.json | 14476 +++++++++++++++++++++++++++++ 702 files changed, 21567 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 all_results.json create mode 100644 config.json create mode 100644 eval_results.json create mode 100644 generation_config.json create mode 100644 margin_logs/margins.jsonl create mode 100644 margin_logs/step_0000001.npy create mode 100644 margin_logs/step_0000002.npy create mode 100644 margin_logs/step_0000003.npy create mode 100644 margin_logs/step_0000004.npy create mode 100644 margin_logs/step_0000005.npy create mode 100644 margin_logs/step_0000006.npy create mode 100644 margin_logs/step_0000007.npy create mode 100644 margin_logs/step_0000008.npy create mode 100644 margin_logs/step_0000009.npy create mode 100644 margin_logs/step_0000010.npy create mode 100644 margin_logs/step_0000011.npy create mode 100644 margin_logs/step_0000012.npy create mode 100644 margin_logs/step_0000013.npy create mode 100644 margin_logs/step_0000014.npy create mode 100644 margin_logs/step_0000015.npy create mode 100644 margin_logs/step_0000016.npy create mode 100644 margin_logs/step_0000017.npy create mode 100644 margin_logs/step_0000018.npy create mode 100644 margin_logs/step_0000019.npy create mode 100644 margin_logs/step_0000020.npy create mode 100644 margin_logs/step_0000021.npy create mode 100644 margin_logs/step_0000022.npy create mode 100644 margin_logs/step_0000023.npy create mode 100644 margin_logs/step_0000024.npy create mode 100644 margin_logs/step_0000025.npy create mode 100644 margin_logs/step_0000026.npy create mode 100644 margin_logs/step_0000027.npy create mode 100644 margin_logs/step_0000028.npy create mode 100644 margin_logs/step_0000029.npy create mode 100644 margin_logs/step_0000030.npy create mode 100644 margin_logs/step_0000031.npy create mode 100644 margin_logs/step_0000032.npy create mode 100644 margin_logs/step_0000033.npy create mode 100644 margin_logs/step_0000034.npy create mode 100644 margin_logs/step_0000035.npy create mode 100644 margin_logs/step_0000036.npy create mode 100644 margin_logs/step_0000037.npy create mode 100644 margin_logs/step_0000038.npy create mode 100644 margin_logs/step_0000039.npy create mode 100644 margin_logs/step_0000040.npy create mode 100644 margin_logs/step_0000041.npy create mode 100644 margin_logs/step_0000042.npy create mode 100644 margin_logs/step_0000043.npy create mode 100644 margin_logs/step_0000044.npy create mode 100644 margin_logs/step_0000045.npy create mode 100644 margin_logs/step_0000046.npy create mode 100644 margin_logs/step_0000047.npy create mode 100644 margin_logs/step_0000048.npy create mode 100644 margin_logs/step_0000049.npy create mode 100644 margin_logs/step_0000050.npy create mode 100644 margin_logs/step_0000051.npy create mode 100644 margin_logs/step_0000052.npy create mode 100644 margin_logs/step_0000053.npy create mode 100644 margin_logs/step_0000054.npy create mode 100644 margin_logs/step_0000055.npy create mode 100644 margin_logs/step_0000056.npy create mode 100644 margin_logs/step_0000057.npy create mode 100644 margin_logs/step_0000058.npy create mode 100644 margin_logs/step_0000059.npy create mode 100644 margin_logs/step_0000060.npy create mode 100644 margin_logs/step_0000061.npy create mode 100644 margin_logs/step_0000062.npy create mode 100644 margin_logs/step_0000063.npy create mode 100644 margin_logs/step_0000064.npy create mode 100644 margin_logs/step_0000065.npy create mode 100644 margin_logs/step_0000066.npy create mode 100644 margin_logs/step_0000067.npy create mode 100644 margin_logs/step_0000068.npy create mode 100644 margin_logs/step_0000069.npy create mode 100644 margin_logs/step_0000070.npy create mode 100644 margin_logs/step_0000071.npy create mode 100644 margin_logs/step_0000072.npy create mode 100644 margin_logs/step_0000073.npy create mode 100644 margin_logs/step_0000074.npy create mode 100644 margin_logs/step_0000075.npy create mode 100644 margin_logs/step_0000076.npy create mode 100644 margin_logs/step_0000077.npy create mode 100644 margin_logs/step_0000078.npy create mode 100644 margin_logs/step_0000079.npy create mode 100644 margin_logs/step_0000080.npy create mode 100644 margin_logs/step_0000081.npy create mode 100644 margin_logs/step_0000082.npy create mode 100644 margin_logs/step_0000083.npy create mode 100644 margin_logs/step_0000084.npy create mode 100644 margin_logs/step_0000085.npy create mode 100644 margin_logs/step_0000086.npy create mode 100644 margin_logs/step_0000087.npy create mode 100644 margin_logs/step_0000088.npy create mode 100644 margin_logs/step_0000089.npy create mode 100644 margin_logs/step_0000090.npy create mode 100644 margin_logs/step_0000091.npy create mode 100644 margin_logs/step_0000092.npy create mode 100644 margin_logs/step_0000093.npy create mode 100644 margin_logs/step_0000094.npy create mode 100644 margin_logs/step_0000095.npy create mode 100644 margin_logs/step_0000096.npy create mode 100644 margin_logs/step_0000097.npy create mode 100644 margin_logs/step_0000098.npy create mode 100644 margin_logs/step_0000099.npy create mode 100644 margin_logs/step_0000100.npy create mode 100644 margin_logs/step_0000101.npy create mode 100644 margin_logs/step_0000102.npy create mode 100644 margin_logs/step_0000103.npy create mode 100644 margin_logs/step_0000104.npy create mode 100644 margin_logs/step_0000105.npy create mode 100644 margin_logs/step_0000106.npy create mode 100644 margin_logs/step_0000107.npy create mode 100644 margin_logs/step_0000108.npy create mode 100644 margin_logs/step_0000109.npy create mode 100644 margin_logs/step_0000110.npy create mode 100644 margin_logs/step_0000111.npy create mode 100644 margin_logs/step_0000112.npy create mode 100644 margin_logs/step_0000113.npy create mode 100644 margin_logs/step_0000114.npy create mode 100644 margin_logs/step_0000115.npy create mode 100644 margin_logs/step_0000116.npy create mode 100644 margin_logs/step_0000117.npy create mode 100644 margin_logs/step_0000118.npy create mode 100644 margin_logs/step_0000119.npy create mode 100644 margin_logs/step_0000120.npy create mode 100644 margin_logs/step_0000121.npy create mode 100644 margin_logs/step_0000122.npy create mode 100644 margin_logs/step_0000123.npy create mode 100644 margin_logs/step_0000124.npy create mode 100644 margin_logs/step_0000125.npy create mode 100644 margin_logs/step_0000126.npy create mode 100644 margin_logs/step_0000127.npy create mode 100644 margin_logs/step_0000128.npy create mode 100644 margin_logs/step_0000129.npy create mode 100644 margin_logs/step_0000130.npy create mode 100644 margin_logs/step_0000131.npy create mode 100644 margin_logs/step_0000132.npy create mode 100644 margin_logs/step_0000133.npy create mode 100644 margin_logs/step_0000134.npy create mode 100644 margin_logs/step_0000135.npy create mode 100644 margin_logs/step_0000136.npy create mode 100644 margin_logs/step_0000137.npy create mode 100644 margin_logs/step_0000138.npy create mode 100644 margin_logs/step_0000139.npy create mode 100644 margin_logs/step_0000140.npy create mode 100644 margin_logs/step_0000141.npy create mode 100644 margin_logs/step_0000142.npy create mode 100644 margin_logs/step_0000143.npy create mode 100644 margin_logs/step_0000144.npy create mode 100644 margin_logs/step_0000145.npy create mode 100644 margin_logs/step_0000146.npy create mode 100644 margin_logs/step_0000147.npy create mode 100644 margin_logs/step_0000148.npy create mode 100644 margin_logs/step_0000149.npy create mode 100644 margin_logs/step_0000150.npy create mode 100644 margin_logs/step_0000151.npy create mode 100644 margin_logs/step_0000152.npy create mode 100644 margin_logs/step_0000153.npy create mode 100644 margin_logs/step_0000154.npy create mode 100644 margin_logs/step_0000155.npy create mode 100644 margin_logs/step_0000156.npy create mode 100644 margin_logs/step_0000157.npy create mode 100644 margin_logs/step_0000158.npy create mode 100644 margin_logs/step_0000159.npy create mode 100644 margin_logs/step_0000160.npy create mode 100644 margin_logs/step_0000161.npy create mode 100644 margin_logs/step_0000162.npy create mode 100644 margin_logs/step_0000163.npy create mode 100644 margin_logs/step_0000164.npy create mode 100644 margin_logs/step_0000165.npy create mode 100644 margin_logs/step_0000166.npy create mode 100644 margin_logs/step_0000167.npy create mode 100644 margin_logs/step_0000168.npy create mode 100644 margin_logs/step_0000169.npy create mode 100644 margin_logs/step_0000170.npy create mode 100644 margin_logs/step_0000171.npy create mode 100644 margin_logs/step_0000172.npy create mode 100644 margin_logs/step_0000173.npy create mode 100644 margin_logs/step_0000174.npy create mode 100644 margin_logs/step_0000175.npy create mode 100644 margin_logs/step_0000176.npy create mode 100644 margin_logs/step_0000177.npy create mode 100644 margin_logs/step_0000178.npy create mode 100644 margin_logs/step_0000179.npy create mode 100644 margin_logs/step_0000180.npy create mode 100644 margin_logs/step_0000181.npy create mode 100644 margin_logs/step_0000182.npy create mode 100644 margin_logs/step_0000183.npy create mode 100644 margin_logs/step_0000184.npy create mode 100644 margin_logs/step_0000185.npy create mode 100644 margin_logs/step_0000186.npy create mode 100644 margin_logs/step_0000187.npy create mode 100644 margin_logs/step_0000188.npy create mode 100644 margin_logs/step_0000189.npy create mode 100644 margin_logs/step_0000190.npy create mode 100644 margin_logs/step_0000191.npy create mode 100644 margin_logs/step_0000192.npy create mode 100644 margin_logs/step_0000193.npy create mode 100644 margin_logs/step_0000194.npy create mode 100644 margin_logs/step_0000195.npy create mode 100644 margin_logs/step_0000196.npy create mode 100644 margin_logs/step_0000197.npy create mode 100644 margin_logs/step_0000198.npy create mode 100644 margin_logs/step_0000199.npy create mode 100644 margin_logs/step_0000200.npy create mode 100644 margin_logs/step_0000201.npy create mode 100644 margin_logs/step_0000202.npy create mode 100644 margin_logs/step_0000203.npy create mode 100644 margin_logs/step_0000204.npy create mode 100644 margin_logs/step_0000205.npy create mode 100644 margin_logs/step_0000206.npy create mode 100644 margin_logs/step_0000207.npy create mode 100644 margin_logs/step_0000208.npy create mode 100644 margin_logs/step_0000209.npy create mode 100644 margin_logs/step_0000210.npy create mode 100644 margin_logs/step_0000211.npy create mode 100644 margin_logs/step_0000212.npy create mode 100644 margin_logs/step_0000213.npy create mode 100644 margin_logs/step_0000214.npy create mode 100644 margin_logs/step_0000215.npy create mode 100644 margin_logs/step_0000216.npy create mode 100644 margin_logs/step_0000217.npy create mode 100644 margin_logs/step_0000218.npy create mode 100644 margin_logs/step_0000219.npy create mode 100644 margin_logs/step_0000220.npy create mode 100644 margin_logs/step_0000221.npy create mode 100644 margin_logs/step_0000222.npy create mode 100644 margin_logs/step_0000223.npy create mode 100644 margin_logs/step_0000224.npy create mode 100644 margin_logs/step_0000225.npy create mode 100644 margin_logs/step_0000226.npy create mode 100644 margin_logs/step_0000227.npy create mode 100644 margin_logs/step_0000228.npy create mode 100644 margin_logs/step_0000229.npy create mode 100644 margin_logs/step_0000230.npy create mode 100644 margin_logs/step_0000231.npy create mode 100644 margin_logs/step_0000232.npy create mode 100644 margin_logs/step_0000233.npy create mode 100644 margin_logs/step_0000234.npy create mode 100644 margin_logs/step_0000235.npy create mode 100644 margin_logs/step_0000236.npy create mode 100644 margin_logs/step_0000237.npy create mode 100644 margin_logs/step_0000238.npy create mode 100644 margin_logs/step_0000239.npy create mode 100644 margin_logs/step_0000240.npy create mode 100644 margin_logs/step_0000241.npy create mode 100644 margin_logs/step_0000242.npy create mode 100644 margin_logs/step_0000243.npy create mode 100644 margin_logs/step_0000244.npy create mode 100644 margin_logs/step_0000245.npy create mode 100644 margin_logs/step_0000246.npy create mode 100644 margin_logs/step_0000247.npy create mode 100644 margin_logs/step_0000248.npy create mode 100644 margin_logs/step_0000249.npy create mode 100644 margin_logs/step_0000250.npy create mode 100644 margin_logs/step_0000251.npy create mode 100644 margin_logs/step_0000252.npy create mode 100644 margin_logs/step_0000253.npy create mode 100644 margin_logs/step_0000254.npy create mode 100644 margin_logs/step_0000255.npy create mode 100644 margin_logs/step_0000256.npy create mode 100644 margin_logs/step_0000257.npy create mode 100644 margin_logs/step_0000258.npy create mode 100644 margin_logs/step_0000259.npy create mode 100644 margin_logs/step_0000260.npy create mode 100644 margin_logs/step_0000261.npy create mode 100644 margin_logs/step_0000262.npy create mode 100644 margin_logs/step_0000263.npy create mode 100644 margin_logs/step_0000264.npy create mode 100644 margin_logs/step_0000265.npy create mode 100644 margin_logs/step_0000266.npy create mode 100644 margin_logs/step_0000267.npy create mode 100644 margin_logs/step_0000268.npy create mode 100644 margin_logs/step_0000269.npy create mode 100644 margin_logs/step_0000270.npy create mode 100644 margin_logs/step_0000271.npy create mode 100644 margin_logs/step_0000272.npy create mode 100644 margin_logs/step_0000273.npy create mode 100644 margin_logs/step_0000274.npy create mode 100644 margin_logs/step_0000275.npy create mode 100644 margin_logs/step_0000276.npy create mode 100644 margin_logs/step_0000277.npy create mode 100644 margin_logs/step_0000278.npy create mode 100644 margin_logs/step_0000279.npy create mode 100644 margin_logs/step_0000280.npy create mode 100644 margin_logs/step_0000281.npy create mode 100644 margin_logs/step_0000282.npy create mode 100644 margin_logs/step_0000283.npy create mode 100644 margin_logs/step_0000284.npy create mode 100644 margin_logs/step_0000285.npy create mode 100644 margin_logs/step_0000286.npy create mode 100644 margin_logs/step_0000287.npy create mode 100644 margin_logs/step_0000288.npy create mode 100644 margin_logs/step_0000289.npy create mode 100644 margin_logs/step_0000290.npy create mode 100644 margin_logs/step_0000291.npy create mode 100644 margin_logs/step_0000292.npy create mode 100644 margin_logs/step_0000293.npy create mode 100644 margin_logs/step_0000294.npy create mode 100644 margin_logs/step_0000295.npy create mode 100644 margin_logs/step_0000296.npy create mode 100644 margin_logs/step_0000297.npy create mode 100644 margin_logs/step_0000298.npy create mode 100644 margin_logs/step_0000299.npy create mode 100644 margin_logs/step_0000300.npy create mode 100644 margin_logs/step_0000301.npy create mode 100644 margin_logs/step_0000302.npy create mode 100644 margin_logs/step_0000303.npy create mode 100644 margin_logs/step_0000304.npy create mode 100644 margin_logs/step_0000305.npy create mode 100644 margin_logs/step_0000306.npy create mode 100644 margin_logs/step_0000307.npy create mode 100644 margin_logs/step_0000308.npy create mode 100644 margin_logs/step_0000309.npy create mode 100644 margin_logs/step_0000310.npy create mode 100644 margin_logs/step_0000311.npy create mode 100644 margin_logs/step_0000312.npy create mode 100644 margin_logs/step_0000313.npy create mode 100644 margin_logs/step_0000314.npy create mode 100644 margin_logs/step_0000315.npy create mode 100644 margin_logs/step_0000316.npy create mode 100644 margin_logs/step_0000317.npy create mode 100644 margin_logs/step_0000318.npy create mode 100644 margin_logs/step_0000319.npy create mode 100644 margin_logs/step_0000320.npy create mode 100644 margin_logs/step_0000321.npy create mode 100644 margin_logs/step_0000322.npy create mode 100644 margin_logs/step_0000323.npy create mode 100644 margin_logs/step_0000324.npy create mode 100644 margin_logs/step_0000325.npy create mode 100644 margin_logs/step_0000326.npy create mode 100644 margin_logs/step_0000327.npy create mode 100644 margin_logs/step_0000328.npy create mode 100644 margin_logs/step_0000329.npy create mode 100644 margin_logs/step_0000330.npy create mode 100644 margin_logs/step_0000331.npy create mode 100644 margin_logs/step_0000332.npy create mode 100644 margin_logs/step_0000333.npy create mode 100644 margin_logs/step_0000334.npy create mode 100644 margin_logs/step_0000335.npy create mode 100644 margin_logs/step_0000336.npy create mode 100644 margin_logs/step_0000337.npy create mode 100644 margin_logs/step_0000338.npy create mode 100644 margin_logs/step_0000339.npy create mode 100644 margin_logs/step_0000340.npy create mode 100644 margin_logs/step_0000341.npy create mode 100644 margin_logs/step_0000342.npy create mode 100644 margin_logs/step_0000343.npy create mode 100644 margin_logs/step_0000344.npy create mode 100644 margin_logs/step_0000345.npy create mode 100644 margin_logs/step_0000346.npy create mode 100644 margin_logs/step_0000347.npy create mode 100644 margin_logs/step_0000348.npy create mode 100644 margin_logs/step_0000349.npy create mode 100644 margin_logs/step_0000350.npy create mode 100644 margin_logs/step_0000351.npy create mode 100644 margin_logs/step_0000352.npy create mode 100644 margin_logs/step_0000353.npy create mode 100644 margin_logs/step_0000354.npy create mode 100644 margin_logs/step_0000355.npy create mode 100644 margin_logs/step_0000356.npy create mode 100644 margin_logs/step_0000357.npy create mode 100644 margin_logs/step_0000358.npy create mode 100644 margin_logs/step_0000359.npy create mode 100644 margin_logs/step_0000360.npy create mode 100644 margin_logs/step_0000361.npy create mode 100644 margin_logs/step_0000362.npy create mode 100644 margin_logs/step_0000363.npy create mode 100644 margin_logs/step_0000364.npy create mode 100644 margin_logs/step_0000365.npy create mode 100644 margin_logs/step_0000366.npy create mode 100644 margin_logs/step_0000367.npy create mode 100644 margin_logs/step_0000368.npy create mode 100644 margin_logs/step_0000369.npy create mode 100644 margin_logs/step_0000370.npy create mode 100644 margin_logs/step_0000371.npy create mode 100644 margin_logs/step_0000372.npy create mode 100644 margin_logs/step_0000373.npy create mode 100644 margin_logs/step_0000374.npy create mode 100644 margin_logs/step_0000375.npy create mode 100644 margin_logs/step_0000376.npy create mode 100644 margin_logs/step_0000377.npy create mode 100644 margin_logs/step_0000378.npy create mode 100644 margin_logs/step_0000379.npy create mode 100644 margin_logs/step_0000380.npy create mode 100644 margin_logs/step_0000381.npy create mode 100644 margin_logs/step_0000382.npy create mode 100644 margin_logs/step_0000383.npy create mode 100644 margin_logs/step_0000384.npy create mode 100644 margin_logs/step_0000385.npy create mode 100644 margin_logs/step_0000386.npy create mode 100644 margin_logs/step_0000387.npy create mode 100644 margin_logs/step_0000388.npy create mode 100644 margin_logs/step_0000389.npy create mode 100644 margin_logs/step_0000390.npy create mode 100644 margin_logs/step_0000391.npy create mode 100644 margin_logs/step_0000392.npy create mode 100644 margin_logs/step_0000393.npy create mode 100644 margin_logs/step_0000394.npy create mode 100644 margin_logs/step_0000395.npy create mode 100644 margin_logs/step_0000396.npy create mode 100644 margin_logs/step_0000397.npy create mode 100644 margin_logs/step_0000398.npy create mode 100644 margin_logs/step_0000399.npy create mode 100644 margin_logs/step_0000400.npy create mode 100644 margin_logs/step_0000401.npy create mode 100644 margin_logs/step_0000402.npy create mode 100644 margin_logs/step_0000403.npy create mode 100644 margin_logs/step_0000404.npy create mode 100644 margin_logs/step_0000405.npy create mode 100644 margin_logs/step_0000406.npy create mode 100644 margin_logs/step_0000407.npy create mode 100644 margin_logs/step_0000408.npy create mode 100644 margin_logs/step_0000409.npy create mode 100644 margin_logs/step_0000410.npy create mode 100644 margin_logs/step_0000411.npy create mode 100644 margin_logs/step_0000412.npy create mode 100644 margin_logs/step_0000413.npy create mode 100644 margin_logs/step_0000414.npy create mode 100644 margin_logs/step_0000415.npy create mode 100644 margin_logs/step_0000416.npy create mode 100644 margin_logs/step_0000417.npy create mode 100644 margin_logs/step_0000418.npy create mode 100644 margin_logs/step_0000419.npy create mode 100644 margin_logs/step_0000420.npy create mode 100644 margin_logs/step_0000421.npy create mode 100644 margin_logs/step_0000422.npy create mode 100644 margin_logs/step_0000423.npy create mode 100644 margin_logs/step_0000424.npy create mode 100644 margin_logs/step_0000425.npy create mode 100644 margin_logs/step_0000426.npy create mode 100644 margin_logs/step_0000427.npy create mode 100644 margin_logs/step_0000428.npy create mode 100644 margin_logs/step_0000429.npy create mode 100644 margin_logs/step_0000430.npy create mode 100644 margin_logs/step_0000431.npy create mode 100644 margin_logs/step_0000432.npy create mode 100644 margin_logs/step_0000433.npy create mode 100644 margin_logs/step_0000434.npy create mode 100644 margin_logs/step_0000435.npy create mode 100644 margin_logs/step_0000436.npy create mode 100644 margin_logs/step_0000437.npy create mode 100644 margin_logs/step_0000438.npy create mode 100644 margin_logs/step_0000439.npy create mode 100644 margin_logs/step_0000440.npy create mode 100644 margin_logs/step_0000441.npy create mode 100644 margin_logs/step_0000442.npy create mode 100644 margin_logs/step_0000443.npy create mode 100644 margin_logs/step_0000444.npy create mode 100644 margin_logs/step_0000445.npy create mode 100644 margin_logs/step_0000446.npy create mode 100644 margin_logs/step_0000447.npy create mode 100644 margin_logs/step_0000448.npy create mode 100644 margin_logs/step_0000449.npy create mode 100644 margin_logs/step_0000450.npy create mode 100644 margin_logs/step_0000451.npy create mode 100644 margin_logs/step_0000452.npy create mode 100644 margin_logs/step_0000453.npy create mode 100644 margin_logs/step_0000454.npy create mode 100644 margin_logs/step_0000455.npy create mode 100644 margin_logs/step_0000456.npy create mode 100644 margin_logs/step_0000457.npy create mode 100644 margin_logs/step_0000458.npy create mode 100644 margin_logs/step_0000459.npy create mode 100644 margin_logs/step_0000460.npy create mode 100644 margin_logs/step_0000461.npy create mode 100644 margin_logs/step_0000462.npy create mode 100644 margin_logs/step_0000463.npy create mode 100644 margin_logs/step_0000464.npy create mode 100644 margin_logs/step_0000465.npy create mode 100644 margin_logs/step_0000466.npy create mode 100644 margin_logs/step_0000467.npy create mode 100644 margin_logs/step_0000468.npy create mode 100644 margin_logs/step_0000469.npy create mode 100644 margin_logs/step_0000470.npy create mode 100644 margin_logs/step_0000471.npy create mode 100644 margin_logs/step_0000472.npy create mode 100644 margin_logs/step_0000473.npy create mode 100644 margin_logs/step_0000474.npy create mode 100644 margin_logs/step_0000475.npy create mode 100644 margin_logs/step_0000476.npy create mode 100644 margin_logs/step_0000477.npy create mode 100644 margin_logs/step_0000478.npy create mode 100644 margin_logs/step_0000479.npy create mode 100644 margin_logs/step_0000480.npy create mode 100644 margin_logs/step_0000481.npy create mode 100644 margin_logs/step_0000482.npy create mode 100644 margin_logs/step_0000483.npy create mode 100644 margin_logs/step_0000484.npy create mode 100644 margin_logs/step_0000485.npy create mode 100644 margin_logs/step_0000486.npy create mode 100644 margin_logs/step_0000487.npy create mode 100644 margin_logs/step_0000488.npy create mode 100644 margin_logs/step_0000489.npy create mode 100644 margin_logs/step_0000490.npy create mode 100644 margin_logs/step_0000491.npy create mode 100644 margin_logs/step_0000492.npy create mode 100644 margin_logs/step_0000493.npy create mode 100644 margin_logs/step_0000494.npy create mode 100644 margin_logs/step_0000495.npy create mode 100644 margin_logs/step_0000496.npy create mode 100644 margin_logs/step_0000497.npy create mode 100644 margin_logs/step_0000498.npy create mode 100644 margin_logs/step_0000499.npy create mode 100644 margin_logs/step_0000500.npy create mode 100644 margin_logs/step_0000501.npy create mode 100644 margin_logs/step_0000502.npy create mode 100644 margin_logs/step_0000503.npy create mode 100644 margin_logs/step_0000504.npy create mode 100644 margin_logs/step_0000505.npy create mode 100644 margin_logs/step_0000506.npy create mode 100644 margin_logs/step_0000507.npy create mode 100644 margin_logs/step_0000508.npy create mode 100644 margin_logs/step_0000509.npy create mode 100644 margin_logs/step_0000510.npy create mode 100644 margin_logs/step_0000511.npy create mode 100644 margin_logs/step_0000512.npy create mode 100644 margin_logs/step_0000513.npy create mode 100644 margin_logs/step_0000514.npy create mode 100644 margin_logs/step_0000515.npy create mode 100644 margin_logs/step_0000516.npy create mode 100644 margin_logs/step_0000517.npy create mode 100644 margin_logs/step_0000518.npy create mode 100644 margin_logs/step_0000519.npy create mode 100644 margin_logs/step_0000520.npy create mode 100644 margin_logs/step_0000521.npy create mode 100644 margin_logs/step_0000522.npy create mode 100644 margin_logs/step_0000523.npy create mode 100644 margin_logs/step_0000524.npy create mode 100644 margin_logs/step_0000525.npy create mode 100644 margin_logs/step_0000526.npy create mode 100644 margin_logs/step_0000527.npy create mode 100644 margin_logs/step_0000528.npy create mode 100644 margin_logs/step_0000529.npy create mode 100644 margin_logs/step_0000530.npy create mode 100644 margin_logs/step_0000531.npy create mode 100644 margin_logs/step_0000532.npy create mode 100644 margin_logs/step_0000533.npy create mode 100644 margin_logs/step_0000534.npy create mode 100644 margin_logs/step_0000535.npy create mode 100644 margin_logs/step_0000536.npy create mode 100644 margin_logs/step_0000537.npy create mode 100644 margin_logs/step_0000538.npy create mode 100644 margin_logs/step_0000539.npy create mode 100644 margin_logs/step_0000540.npy create mode 100644 margin_logs/step_0000541.npy create mode 100644 margin_logs/step_0000542.npy create mode 100644 margin_logs/step_0000543.npy create mode 100644 margin_logs/step_0000544.npy create mode 100644 margin_logs/step_0000545.npy create mode 100644 margin_logs/step_0000546.npy create mode 100644 margin_logs/step_0000547.npy create mode 100644 margin_logs/step_0000548.npy create mode 100644 margin_logs/step_0000549.npy create mode 100644 margin_logs/step_0000550.npy create mode 100644 margin_logs/step_0000551.npy create mode 100644 margin_logs/step_0000552.npy create mode 100644 margin_logs/step_0000553.npy create mode 100644 margin_logs/step_0000554.npy create mode 100644 margin_logs/step_0000555.npy create mode 100644 margin_logs/step_0000556.npy create mode 100644 margin_logs/step_0000557.npy create mode 100644 margin_logs/step_0000558.npy create mode 100644 margin_logs/step_0000559.npy create mode 100644 margin_logs/step_0000560.npy create mode 100644 margin_logs/step_0000561.npy create mode 100644 margin_logs/step_0000562.npy create mode 100644 margin_logs/step_0000563.npy create mode 100644 margin_logs/step_0000564.npy create mode 100644 margin_logs/step_0000565.npy create mode 100644 margin_logs/step_0000566.npy create mode 100644 margin_logs/step_0000567.npy create mode 100644 margin_logs/step_0000568.npy create mode 100644 margin_logs/step_0000569.npy create mode 100644 margin_logs/step_0000570.npy create mode 100644 margin_logs/step_0000571.npy create mode 100644 margin_logs/step_0000572.npy create mode 100644 margin_logs/step_0000573.npy create mode 100644 margin_logs/step_0000574.npy create mode 100644 margin_logs/step_0000575.npy create mode 100644 margin_logs/step_0000576.npy create mode 100644 margin_logs/step_0000577.npy create mode 100644 margin_logs/step_0000578.npy create mode 100644 margin_logs/step_0000579.npy create mode 100644 margin_logs/step_0000580.npy create mode 100644 margin_logs/step_0000581.npy create mode 100644 margin_logs/step_0000582.npy create mode 100644 margin_logs/step_0000583.npy create mode 100644 margin_logs/step_0000584.npy create mode 100644 margin_logs/step_0000585.npy create mode 100644 margin_logs/step_0000586.npy create mode 100644 margin_logs/step_0000587.npy create mode 100644 margin_logs/step_0000588.npy create mode 100644 margin_logs/step_0000589.npy create mode 100644 margin_logs/step_0000590.npy create mode 100644 margin_logs/step_0000591.npy create mode 100644 margin_logs/step_0000592.npy create mode 100644 margin_logs/step_0000593.npy create mode 100644 margin_logs/step_0000594.npy create mode 100644 margin_logs/step_0000595.npy create mode 100644 margin_logs/step_0000596.npy create mode 100644 margin_logs/step_0000597.npy create mode 100644 margin_logs/step_0000598.npy create mode 100644 margin_logs/step_0000599.npy create mode 100644 margin_logs/step_0000600.npy create mode 100644 margin_logs/step_0000601.npy create mode 100644 margin_logs/step_0000602.npy create mode 100644 margin_logs/step_0000603.npy create mode 100644 margin_logs/step_0000604.npy create mode 100644 margin_logs/step_0000605.npy create mode 100644 margin_logs/step_0000606.npy create mode 100644 margin_logs/step_0000607.npy create mode 100644 margin_logs/step_0000608.npy create mode 100644 margin_logs/step_0000609.npy create mode 100644 margin_logs/step_0000610.npy create mode 100644 margin_logs/step_0000611.npy create mode 100644 margin_logs/step_0000612.npy create mode 100644 margin_logs/step_0000613.npy create mode 100644 margin_logs/step_0000614.npy create mode 100644 margin_logs/step_0000615.npy create mode 100644 margin_logs/step_0000616.npy create mode 100644 margin_logs/step_0000617.npy create mode 100644 margin_logs/step_0000618.npy create mode 100644 margin_logs/step_0000619.npy create mode 100644 margin_logs/step_0000620.npy create mode 100644 margin_logs/step_0000621.npy create mode 100644 margin_logs/step_0000622.npy create mode 100644 margin_logs/step_0000623.npy create mode 100644 margin_logs/step_0000624.npy create mode 100644 margin_logs/step_0000625.npy create mode 100644 margin_logs/step_0000626.npy create mode 100644 margin_logs/step_0000627.npy create mode 100644 margin_logs/step_0000628.npy create mode 100644 margin_logs/step_0000629.npy create mode 100644 margin_logs/step_0000630.npy create mode 100644 margin_logs/step_0000631.npy create mode 100644 margin_logs/step_0000632.npy create mode 100644 margin_logs/step_0000633.npy create mode 100644 margin_logs/step_0000634.npy create mode 100644 margin_logs/step_0000635.npy create mode 100644 margin_logs/step_0000636.npy create mode 100644 margin_logs/step_0000637.npy create mode 100644 margin_logs/step_0000638.npy create mode 100644 margin_logs/step_0000639.npy create mode 100644 margin_logs/step_0000640.npy create mode 100644 margin_logs/step_0000641.npy create mode 100644 margin_logs/step_0000642.npy create mode 100644 margin_logs/step_0000643.npy create mode 100644 margin_logs/step_0000644.npy create mode 100644 margin_logs/step_0000645.npy create mode 100644 margin_logs/step_0000646.npy create mode 100644 margin_logs/step_0000647.npy create mode 100644 margin_logs/step_0000648.npy create mode 100644 margin_logs/step_0000649.npy create mode 100644 margin_logs/step_0000650.npy create mode 100644 margin_logs/step_0000651.npy create mode 100644 margin_logs/step_0000652.npy create mode 100644 margin_logs/step_0000653.npy create mode 100644 margin_logs/step_0000654.npy create mode 100644 margin_logs/step_0000655.npy create mode 100644 margin_logs/step_0000656.npy create mode 100644 margin_logs/step_0000657.npy create mode 100644 margin_logs/step_0000658.npy create mode 100644 margin_logs/step_0000659.npy create mode 100644 margin_logs/step_0000660.npy create mode 100644 margin_logs/step_0000661.npy create mode 100644 margin_logs/step_0000662.npy create mode 100644 margin_logs/step_0000663.npy create mode 100644 margin_logs/step_0000664.npy create mode 100644 margin_logs/step_0000665.npy create mode 100644 margin_logs/step_0000666.npy create mode 100644 margin_logs/step_0000667.npy create mode 100644 margin_logs/step_0000668.npy create mode 100644 margin_logs/step_0000669.npy create mode 100644 margin_logs/step_0000670.npy create mode 100644 margin_logs/step_0000671.npy create mode 100644 margin_logs/step_0000672.npy create mode 100644 margin_logs/step_0000673.npy create mode 100644 margin_logs/step_0000674.npy create mode 100644 margin_logs/step_0000675.npy create mode 100644 margin_logs/step_0000676.npy create mode 100644 margin_logs/step_0000677.npy create mode 100644 margin_logs/step_0000678.npy create mode 100644 margin_logs/step_0000679.npy create mode 100644 margin_logs/step_0000680.npy create mode 100644 margin_logs/step_0000681.npy create mode 100644 model-00001-of-00007.safetensors create mode 100644 model-00002-of-00007.safetensors create mode 100644 model-00003-of-00007.safetensors create mode 100644 model-00004-of-00007.safetensors create mode 100644 model-00005-of-00007.safetensors create mode 100644 model-00006-of-00007.safetensors create mode 100644 model-00007-of-00007.safetensors create mode 100644 model.safetensors.index.json create mode 100644 special_tokens_map.json create mode 100644 tokenizer.json create mode 100644 tokenizer_config.json create mode 100644 train.log create mode 100644 train_results.json create mode 100644 trainer_state.json diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..52373fe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..1beccbe --- /dev/null +++ b/README.md @@ -0,0 +1,86 @@ +--- +library_name: transformers +base_model: llama-3-8b-base-sft-hh-helpful-4xh200-batch-64 +tags: +- alignment-handbook +- margin-dpo +- generated_from_trainer +datasets: +- Anthropic/hh-rlhf +model-index: +- name: llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312 + results: [] +--- + + + +# llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312 + +This model is a fine-tuned version of [llama-3-8b-base-sft-hh-helpful-4xh200-batch-64](https://huggingface.co/llama-3-8b-base-sft-hh-helpful-4xh200-batch-64) on the Anthropic/hh-rlhf dataset. +It achieves the following results on the evaluation set: +- Loss: 0.4055 +- Margin Dpo/beta: 0.1000 +- Margin Dpo/loss Margin Mean: 21.7395 +- Margin Dpo/beta Margin Mean: 2.1740 +- Margin Dpo/beta Margin Std: 2.6342 +- Margin Dpo/beta Margin Grad Mean: -0.2573 +- Margin Dpo/beta Margin Grad Std: 0.2541 +- Margin Dpo/margin Mean: 21.7395 +- Margin Dpo/margin Std: 26.3422 +- Logps/chosen: -105.8801 +- Logps/rejected: -135.3665 +- Logps/ref Chosen: -79.0510 +- Logps/ref Rejected: -86.7979 +- Logits/chosen: -0.6200 +- Logits/rejected: -0.5940 + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 5e-07 +- train_batch_size: 8 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 2 +- total_train_batch_size: 64 +- total_eval_batch_size: 32 +- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.1 +- num_epochs: 1 + +### Training results + +| Training Loss | Epoch | Step | Validation Loss | Margin Dpo/beta | Margin Dpo/loss Margin Mean | Margin Dpo/beta Margin Mean | Margin Dpo/beta Margin Std | Margin Dpo/beta Margin Grad Mean | Margin Dpo/beta Margin Grad Std | Margin Dpo/margin Mean | Margin Dpo/margin Std | Logps/chosen | Logps/rejected | Logps/ref Chosen | Logps/ref Rejected | Logits/chosen | Logits/rejected | +|:-------------:|:------:|:----:|:---------------:|:---------------:|:---------------------------:|:---------------------------:|:--------------------------:|:--------------------------------:|:-------------------------------:|:----------------------:|:---------------------:|:------------:|:--------------:|:----------------:|:------------------:|:-------------:|:---------------:| +| 0.9045 | 0.1468 | 100 | 0.5612 | 0.1000 | 8.4438 | 0.8444 | 1.5441 | -0.3672 | 0.2312 | 8.4438 | 15.4407 | -87.2143 | -103.4049 | -79.0510 | -86.7979 | -0.6444 | -0.6169 | +| 0.6573 | 0.2937 | 200 | 0.4777 | 0.1000 | 14.6731 | 1.4673 | 2.1103 | -0.3106 | 0.2466 | 14.6731 | 21.1032 | -92.9744 | -115.3944 | -79.0510 | -86.7979 | -0.6438 | -0.6210 | +| 0.7096 | 0.4405 | 300 | 0.4405 | 0.1000 | 18.1127 | 1.8113 | 2.3747 | -0.2825 | 0.2514 | 18.1127 | 23.7469 | -100.1293 | -125.9889 | -79.0510 | -86.7979 | -0.6160 | -0.5900 | +| 0.4494 | 0.5874 | 400 | 0.4219 | 0.1000 | 20.1798 | 2.0180 | 2.5367 | -0.2694 | 0.2538 | 20.1798 | 25.3668 | -101.8411 | -129.7678 | -79.0510 | -86.7979 | -0.6053 | -0.5765 | +| 0.3799 | 0.7342 | 500 | 0.4100 | 0.1000 | 21.6333 | 2.1633 | 2.6378 | -0.2586 | 0.2554 | 21.6333 | 26.3782 | -106.3940 | -135.7742 | -79.0510 | -86.7979 | -0.6186 | -0.5922 | +| 0.4868 | 0.8811 | 600 | 0.4055 | 0.1000 | 21.7395 | 2.1740 | 2.6342 | -0.2573 | 0.2541 | 21.7395 | 26.3422 | -105.8801 | -135.3665 | -79.0510 | -86.7979 | -0.6200 | -0.5940 | + + +### Framework versions + +- Transformers 4.51.0 +- Pytorch 2.3.1+cu121 +- Datasets 2.21.0 +- Tokenizers 0.21.4 diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000..4771d23 --- /dev/null +++ b/all_results.json @@ -0,0 +1,28 @@ +{ + "epoch": 1.0, + "eval_logits/chosen": -0.5923872590065002, + "eval_logits/rejected": -0.5646775364875793, + "eval_logps/chosen": -106.05509948730469, + "eval_logps/ref_chosen": -79.05104064941406, + "eval_logps/ref_rejected": -86.79793548583984, + "eval_logps/rejected": -135.59478759765625, + "eval_loss": 0.40463200211524963, + "eval_margin_dpo/beta": 0.10000000149011612, + "eval_margin_dpo/beta_margin_grad_mean": -0.25669676065444946, + "eval_margin_dpo/beta_margin_grad_std": 0.2539878487586975, + "eval_margin_dpo/beta_margin_mean": 2.1792807579040527, + "eval_margin_dpo/beta_margin_std": 2.6363675594329834, + "eval_margin_dpo/loss_margin_mean": 21.79280662536621, + "eval_margin_dpo/margin_mean": 21.79280662536621, + "eval_margin_dpo/margin_std": 26.363676071166992, + "eval_runtime": 40.0533, + "eval_samples": 2339, + "eval_samples_per_second": 58.397, + "eval_steps_per_second": 1.848, + "total_flos": 0.0, + "train_loss": 0.5730435011495403, + "train_runtime": 3273.0613, + "train_samples": 43598, + "train_samples_per_second": 13.32, + "train_steps_per_second": 0.208 +} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..5092b09 --- /dev/null +++ b/config.json @@ -0,0 +1,29 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.51.0", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/eval_results.json b/eval_results.json new file mode 100644 index 0000000..e0086bb --- /dev/null +++ b/eval_results.json @@ -0,0 +1,22 @@ +{ + "epoch": 1.0, + "eval_logits/chosen": -0.5923872590065002, + "eval_logits/rejected": -0.5646775364875793, + "eval_logps/chosen": -106.05509948730469, + "eval_logps/ref_chosen": -79.05104064941406, + "eval_logps/ref_rejected": -86.79793548583984, + "eval_logps/rejected": -135.59478759765625, + "eval_loss": 0.40463200211524963, + "eval_margin_dpo/beta": 0.10000000149011612, + "eval_margin_dpo/beta_margin_grad_mean": -0.25669676065444946, + "eval_margin_dpo/beta_margin_grad_std": 0.2539878487586975, + "eval_margin_dpo/beta_margin_mean": 2.1792807579040527, + "eval_margin_dpo/beta_margin_std": 2.6363675594329834, + "eval_margin_dpo/loss_margin_mean": 21.79280662536621, + "eval_margin_dpo/margin_mean": 21.79280662536621, + "eval_margin_dpo/margin_std": 26.363676071166992, + "eval_runtime": 40.0533, + "eval_samples": 2339, + "eval_samples_per_second": 58.397, + "eval_steps_per_second": 1.848 +} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..76247c9 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,9 @@ +{ + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": 128001, + "max_length": 4096, + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.51.0" +} diff --git a/margin_logs/margins.jsonl b/margin_logs/margins.jsonl new file mode 100644 index 0000000..c443cee --- /dev/null +++ b/margin_logs/margins.jsonl @@ -0,0 +1,681 @@ +{"epoch": 0.0, "step": 1, "batch_size": 64, "mean": -0.02287048101425171, "std": 0.42023447155952454, "min": -1.4034271240234375, "p10": -0.46674575805664065, "median": 0.04234886169433594, "p90": 0.4323463439941407, "max": 0.89263916015625, "pos_frac": 0.53125, "sample": [-0.06523895263671875, 0.436798095703125, 0.27811431884765625, -0.9194221496582031, 0.018890380859375, 0.20587158203125, 0.18878173828125, -0.3968696594238281, 0.26206207275390625, 0.2470550537109375, -0.040912628173828125, 0.4394989013671875, -0.44133758544921875, -0.39148712158203125, 0.2764854431152344, 0.89263916015625, -0.42584991455078125, -0.46125030517578125, -0.8638992309570312, -0.3508758544921875, 0.371368408203125, 0.887847900390625, -0.382904052734375, 0.36145782470703125, -0.4890003204345703, 0.052455902099609375, -0.036136627197265625, 0.23079299926757812, 0.2469482421875, 0.1643218994140625, -0.07129669189453125, 0.2790794372558594, 0.3637123107910156, -0.8916168212890625, 0.03298759460449219, -0.2790107727050781, -0.17860984802246094, 0.23892593383789062, 0.05171012878417969, -0.2564239501953125, -0.14655303955078125, 0.27777862548828125, 0.0810394287109375, -1.4034271240234375, -0.28739166259765625, -0.1489429473876953, 0.44918060302734375, 0.1693286895751953, 0.10933303833007812, -0.14766693115234375, -0.40944671630859375, -0.18532562255859375, 0.6261310577392578, -0.20856857299804688, 0.602569580078125, 0.05538177490234375, 0.1505279541015625, 0.1313800811767578, -0.006317138671875, 0.42195892333984375, -0.29936981201171875, -0.4691009521484375, 0.16705322265625, -0.5789260864257812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000001.npy"} +{"epoch": 0.0014684287812041115, "step": 2, "batch_size": 64, "mean": -0.06572240591049194, "std": 0.3523969054222107, "min": -0.9291305541992188, "p10": -0.46334152221679686, "median": -0.05502510070800781, "p90": 0.3672500610351563, "max": 1.0444793701171875, "pos_frac": 0.4375, "sample": [-0.2829437255859375, 0.3027191162109375, -0.19867706298828125, -0.3062286376953125, 0.10318756103515625, 0.20131683349609375, -0.34906005859375, 0.2802886962890625, 0.1914520263671875, -0.31072998046875, 0.08922195434570312, 0.10284614562988281, -0.03655242919921875, -0.0604095458984375, -0.06208038330078125, 0.32562255859375, -0.37982177734375, 0.2746162414550781, -0.049640655517578125, 0.3752174377441406, -0.103973388671875, 0.0699462890625, 0.36417388916015625, -0.033428192138671875, 0.37265777587890625, -0.3787078857421875, -0.6610565185546875, 0.4720420837402344, 0.47701263427734375, -0.27928924560546875, -0.44719696044921875, -0.0965118408203125, -0.7628555297851562, 0.046764373779296875, 0.06670379638671875, -0.9291305541992188, -0.7122802734375, -0.16554832458496094, 0.1485595703125, -0.07539939880371094, 0.2588920593261719, 0.039890289306640625, 0.201690673828125, 0.0623016357421875, 1.0444793701171875, -0.37696075439453125, -0.02794647216796875, -0.223297119140625, -0.35730743408203125, -0.1309051513671875, -0.3106689453125, -0.11409187316894531, -0.1669769287109375, 0.131317138671875, -0.2361297607421875, 0.4093780517578125, -0.6485977172851562, 0.36856842041015625, -0.1951904296875, -0.4702606201171875, -0.7624168395996094, 0.008928298950195312, -0.31630706787109375, 0.022550582885742188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000002.npy"} +{"epoch": 0.002936857562408223, "step": 3, "batch_size": 64, "mean": -0.01640373468399048, "std": 0.33150625228881836, "min": -0.82525634765625, "p10": -0.47623100280761715, "median": -0.032321929931640625, "p90": 0.46041240692138685, "max": 0.7672195434570312, "pos_frac": 0.46875, "sample": [-0.2111034393310547, -0.44612884521484375, -0.00292205810546875, 0.7672195434570312, -0.2964191436767578, 0.008512496948242188, 0.2647132873535156, 0.013631820678710938, -0.1563262939453125, 0.009889602661132812, -0.82525634765625, 0.1518840789794922, -0.1895904541015625, -0.329742431640625, -0.08674240112304688, -0.50775146484375, 0.11066436767578125, -0.11901473999023438, 0.214202880859375, 0.08998489379882812, -0.6874618530273438, -0.04742431640625, 0.7362594604492188, -0.1116180419921875, 0.32257080078125, 0.05938720703125, 0.11513137817382812, 0.4285144805908203, 0.18306350708007812, -0.34081268310546875, 0.5052871704101562, 0.62109375, 0.15777206420898438, -0.051849365234375, 0.06997108459472656, -0.0801239013671875, -0.499176025390625, -0.20870208740234375, -0.08841705322265625, -0.09004974365234375, -0.190826416015625, 0.10590362548828125, -0.4908599853515625, -0.02978515625, 0.14806365966796875, -0.10419464111328125, -0.03485870361328125, 0.5008926391601562, -0.4891319274902344, -0.10234832763671875, 0.18095970153808594, -0.3004341125488281, 0.18291091918945312, 0.06221771240234375, -0.105560302734375, 0.27085113525390625, 0.47408294677734375, -0.38299560546875, -0.2723388671875, 0.6761932373046875, 0.012342453002929688, 0.078887939453125, -0.0719146728515625, -0.6210174560546875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000003.npy"} +{"epoch": 0.004405286343612335, "step": 4, "batch_size": 64, "mean": 0.0101853609085083, "std": 0.40870770812034607, "min": -1.098907470703125, "p10": -0.48458175659179686, "median": 0.02813720703125, "p90": 0.5255672454833985, "max": 0.9482345581054688, "pos_frac": 0.53125, "sample": [-0.4273338317871094, -0.22176742553710938, 0.10195159912109375, -0.05615234375, -0.38072967529296875, -0.2880859375, 0.8399810791015625, -1.098907470703125, -0.2165679931640625, -0.4877471923828125, -0.09852027893066406, 0.5303840637207031, 0.09703826904296875, 0.37158203125, 0.0494842529296875, -0.08495903015136719, -0.528533935546875, -0.20465469360351562, -0.08935165405273438, 0.08026313781738281, -0.45526885986328125, 0.0806427001953125, 0.138702392578125, -0.446441650390625, 0.390655517578125, -0.2508087158203125, 0.14089202880859375, 0.44715118408203125, -0.06812095642089844, -0.84954833984375, -0.02581024169921875, -0.08330535888671875, -0.2428741455078125, 0.43235015869140625, 0.023534774780273438, 0.38907623291015625, -0.05751800537109375, 0.7156219482421875, 0.3605537414550781, 0.1596221923828125, -0.47719573974609375, -0.18732452392578125, 0.29753875732421875, 0.7125701904296875, 0.103973388671875, 0.0317535400390625, 0.2245941162109375, 0.283355712890625, -0.09208297729492188, -0.5011672973632812, 0.5593852996826172, 0.719696044921875, 0.3852806091308594, -0.7695693969726562, 0.0245208740234375, 0.08063316345214844, 0.9482345581054688, -0.1696624755859375, 0.04378509521484375, 0.16285324096679688, 0.5143280029296875, 0.06553459167480469, -0.4234161376953125, -0.572235107421875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000004.npy"} +{"epoch": 0.005873715124816446, "step": 5, "batch_size": 64, "mean": 0.02979910373687744, "std": 0.43392759561538696, "min": -1.1028594970703125, "p10": -0.48680229187011714, "median": 0.011505126953125, "p90": 0.46049728393554695, "max": 1.4181747436523438, "pos_frac": 0.515625, "sample": [-0.14392852783203125, 0.381744384765625, -0.22560882568359375, -0.24718475341796875, 0.7384109497070312, -0.557861328125, 0.3450736999511719, 0.01520538330078125, -0.1266040802001953, -0.123291015625, -0.42937469482421875, 0.441650390625, 0.09767723083496094, 0.15126800537109375, -0.17428970336914062, 0.1509552001953125, -0.23661041259765625, -0.4689674377441406, 0.2988395690917969, -0.5326080322265625, 0.15410995483398438, 0.19065093994140625, -0.09100341796875, 0.46857452392578125, 0.05630302429199219, -0.2080078125, 1.1636581420898438, 0.31301116943359375, -0.244384765625, 0.04825592041015625, -0.04732322692871094, 0.2502326965332031, 0.6631622314453125, 0.08171272277832031, -0.0837860107421875, -0.3185272216796875, 0.48700714111328125, 0.24405288696289062, 0.01763153076171875, 0.42409515380859375, -0.02172088623046875, 0.15001678466796875, 0.29265594482421875, -1.1028594970703125, 1.4181747436523438, -0.3079986572265625, -0.07465362548828125, -0.133514404296875, -0.49444580078125, -0.797515869140625, -0.15690994262695312, -0.6249332427978516, 0.198638916015625, -0.26021575927734375, -0.08252334594726562, 0.00780487060546875, 1.065704345703125, -0.10533905029296875, -0.5180511474609375, 0.4348182678222656, -0.41757965087890625, 0.02445220947265625, 0.2408905029296875, 0.24832534790039062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000005.npy"} +{"epoch": 0.007342143906020558, "step": 6, "batch_size": 64, "mean": 0.0043981969356536865, "std": 0.3865034878253937, "min": -0.95294189453125, "p10": -0.5006668090820312, "median": 0.041072845458984375, "p90": 0.3853317260742188, "max": 1.2647705078125, "pos_frac": 0.546875, "sample": [-0.32056427001953125, 0.18107986450195312, 0.0026073455810546875, 0.1283416748046875, 0.0979461669921875, 0.3321533203125, -0.7009315490722656, -0.4493598937988281, 0.382843017578125, -0.56402587890625, 0.5866889953613281, 0.24840545654296875, -0.95294189453125, 0.2874317169189453, -0.18344497680664062, -0.8419647216796875, -0.089691162109375, -0.24072265625, 0.23504257202148438, 0.03292083740234375, -0.8724365234375, -0.6091880798339844, 0.049224853515625, -0.11350250244140625, -0.11967658996582031, -0.093994140625, 0.3863983154296875, 0.11908721923828125, 0.21823883056640625, -0.14211654663085938, 0.00116729736328125, 0.2959423065185547, 0.6786117553710938, -0.059162139892578125, -0.0280303955078125, 0.224029541015625, -0.3869781494140625, -0.3831939697265625, 0.14795684814453125, 0.28681182861328125, -0.160980224609375, 0.24517822265625, 0.40395355224609375, 0.1644439697265625, 0.177886962890625, 0.24161338806152344, 0.3327770233154297, 0.2385425567626953, 0.422637939453125, -0.4362335205078125, -0.19673919677734375, -0.04328155517578125, 0.1487884521484375, 0.15493011474609375, 0.2411956787109375, -0.10338592529296875, 0.17575454711914062, -0.2304534912109375, -0.17218017578125, 0.5779800415039062, -0.1912841796875, -0.5226554870605469, 1.2647705078125, -0.2227783203125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000006.npy"} +{"epoch": 0.00881057268722467, "step": 7, "batch_size": 64, "mean": 0.01658591628074646, "std": 0.4097239673137665, "min": -0.9780502319335938, "p10": -0.49665374755859376, "median": 0.03392601013183594, "p90": 0.4979003906250001, "max": 1.286529541015625, "pos_frac": 0.546875, "sample": [-0.08548736572265625, -0.39537811279296875, 0.07315444946289062, 0.06892776489257812, -0.9780502319335938, 0.405242919921875, -0.06476974487304688, 0.017822265625, -0.5288543701171875, 0.23825836181640625, -0.384185791015625, 0.6207046508789062, -0.1777496337890625, 0.0109100341796875, 0.3217010498046875, 0.4531402587890625, 0.13829803466796875, -0.1801910400390625, -0.12461280822753906, -0.402252197265625, -0.197601318359375, 0.1327667236328125, 0.5052223205566406, 0.18609237670898438, 0.3572998046875, -0.10807037353515625, -0.24482345581054688, -0.01988983154296875, 0.08131027221679688, 0.041748046875, 0.16432571411132812, 0.039699554443359375, 0.2659721374511719, 0.287445068359375, 0.6437530517578125, -0.316650390625, -0.00302886962890625, 0.07382965087890625, -0.704010009765625, -0.5416336059570312, -0.570648193359375, -0.481475830078125, 0.400146484375, 0.35463714599609375, -0.07279205322265625, 0.106109619140625, 0.4808158874511719, 0.1339874267578125, 0.0281524658203125, -0.29206085205078125, 1.286529541015625, -0.31897735595703125, -0.5031585693359375, 1.1160736083984375, 0.050018310546875, 0.04727935791015625, -0.8425521850585938, 0.5114364624023438, 0.5173835754394531, -0.21199798583984375, 0.2697601318359375, -0.2856597900390625, -0.286163330078125, -0.0457305908203125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000007.npy"} +{"epoch": 0.010279001468428781, "step": 8, "batch_size": 64, "mean": -0.028907448053359985, "std": 0.38289621472358704, "min": -1.013671875, "p10": -0.6849678039550781, "median": 0.016452789306640625, "p90": 0.4017358779907227, "max": 0.6231536865234375, "pos_frac": 0.53125, "sample": [0.0328521728515625, 0.4611053466796875, 0.01796722412109375, -0.008819580078125, -0.6683425903320312, -0.03575325012207031, 0.3965435028076172, 0.6231536865234375, 0.0149383544921875, 0.02257537841796875, -0.07465744018554688, 0.2729301452636719, 0.23681640625, 0.14670562744140625, 0.09227752685546875, 0.2053375244140625, -0.08277130126953125, -0.026882171630859375, 0.3332366943359375, -0.0292205810546875, 0.23574066162109375, -0.7084808349609375, -0.064300537109375, 0.21820068359375, -0.025579452514648438, -0.17706298828125, 0.0821685791015625, 0.3417072296142578, 0.0662689208984375, -0.7982025146484375, 0.29364013671875, -0.6920928955078125, 0.06693267822265625, 0.5344696044921875, 0.1441650390625, 0.149169921875, -0.255706787109375, 0.0915679931640625, -0.001617431640625, 0.379852294921875, -0.24546432495117188, -0.6542892456054688, -0.1901702880859375, -0.2783660888671875, -0.28334808349609375, -1.013671875, -1.0123443603515625, 0.403961181640625, 0.23241424560546875, 0.29299163818359375, -0.8067207336425781, -0.731689453125, -0.08203125, -0.18866729736328125, 0.5494003295898438, -0.18193817138671875, -0.277069091796875, -0.30187225341796875, 0.0109405517578125, -0.227264404296875, 0.14850807189941406, 0.49625396728515625, 0.17012786865234375, 0.5093994140625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000008.npy"} +{"epoch": 0.011747430249632892, "step": 9, "batch_size": 64, "mean": 0.011951416730880737, "std": 0.42925095558166504, "min": -0.7931365966796875, "p10": -0.5231101989746094, "median": -0.0162506103515625, "p90": 0.5921791076660157, "max": 1.1223907470703125, "pos_frac": 0.484375, "sample": [-0.33533477783203125, -0.48809051513671875, 0.7401809692382812, 0.5734710693359375, 0.12963104248046875, -0.5185012817382812, 0.6001968383789062, -0.203765869140625, -0.08232879638671875, 0.0623626708984375, -0.711273193359375, 0.182037353515625, -0.071563720703125, 0.004364013671875, -0.2239227294921875, -0.7686767578125, 0.2171630859375, -0.4033164978027344, -0.459136962890625, -0.6567611694335938, -0.7931365966796875, 0.1706562042236328, -0.23957061767578125, 0.05313873291015625, -0.10013580322265625, -0.09357452392578125, 0.11075973510742188, 0.37469482421875, 0.06093406677246094, 0.9036140441894531, 0.042133331298828125, 0.40459632873535156, -0.100494384765625, -0.02325439453125, -0.5252513885498047, -0.3830413818359375, -0.050289154052734375, -0.07569313049316406, 0.78515625, 0.08303451538085938, -0.47925567626953125, -0.597808837890625, -0.52508544921875, 0.09171295166015625, 0.089935302734375, -0.09490203857421875, -0.009246826171875, -0.1472930908203125, 0.893890380859375, 0.12850379943847656, -0.0339813232421875, -0.15255355834960938, 0.237548828125, 0.6620349884033203, 0.48407745361328125, 1.1223907470703125, 0.3302154541015625, 0.48126220703125, 0.20923233032226562, 0.45648193359375, -0.3697052001953125, 0.44454193115234375, -0.4300689697265625, -0.218048095703125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000009.npy"} +{"epoch": 0.013215859030837005, "step": 10, "batch_size": 64, "mean": 0.05922728776931763, "std": 0.4276774227619171, "min": -1.809234619140625, "p10": -0.31093311309814453, "median": 0.028291702270507812, "p90": 0.44323539733886724, "max": 1.3619842529296875, "pos_frac": 0.515625, "sample": [0.1492767333984375, 0.2663116455078125, 0.05120277404785156, -0.10093498229980469, -0.2581672668457031, 0.264129638671875, -0.0627593994140625, 0.575042724609375, 0.5890731811523438, -0.4343719482421875, -0.460205078125, 0.425140380859375, -0.7145843505859375, 0.27740478515625, -0.23386383056640625, 0.2330474853515625, -0.6119117736816406, 0.15674209594726562, 0.12831878662109375, 0.09332466125488281, 0.3520698547363281, 0.6863365173339844, 0.3076934814453125, 0.2527904510498047, -0.14337921142578125, -0.09195709228515625, -0.4576416015625, -0.1806793212890625, 0.3683452606201172, -0.0061359405517578125, 0.160858154296875, 0.4324607849121094, -0.09528350830078125, -0.25492095947265625, -0.1353912353515625, -0.20672607421875, -1.809234619140625, 0.3697013854980469, 0.2397289276123047, -0.06263542175292969, -0.3018474578857422, -0.18224334716796875, 0.17069435119628906, 0.75079345703125, 0.3128662109375, -0.21875762939453125, 0.36798095703125, -0.015399932861328125, 0.4298820495605469, -0.1521759033203125, -0.091827392578125, -0.05943107604980469, 0.11089324951171875, 0.24083709716796875, -0.2130889892578125, -0.1064300537109375, -0.0140533447265625, 0.38431549072265625, 0.44785308837890625, 1.3619842529296875, 0.8225784301757812, -0.31482696533203125, 0.0053806304931640625, -0.0036468505859375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000010.npy"} +{"epoch": 0.014684287812041116, "step": 11, "batch_size": 64, "mean": 0.04697957634925842, "std": 0.38415780663490295, "min": -0.813446044921875, "p10": -0.48947353363037105, "median": 0.07610893249511719, "p90": 0.46786460876464847, "max": 1.0652923583984375, "pos_frac": 0.546875, "sample": [0.04373931884765625, 0.9750518798828125, 0.39586448669433594, 0.18157386779785156, -0.003025054931640625, -0.0247039794921875, 0.11452484130859375, 0.07768630981445312, -0.11113929748535156, 0.34931182861328125, -0.1962451934814453, 0.1110382080078125, 0.10401153564453125, 0.2312774658203125, -0.813446044921875, 0.08250045776367188, 0.44107818603515625, -0.24770736694335938, -0.229278564453125, 0.08179092407226562, 0.4631061553955078, 0.3653717041015625, 0.090423583984375, -0.14052581787109375, 0.2670135498046875, 0.46509552001953125, -0.055999755859375, 0.09597015380859375, -0.12481689453125, 0.08760833740234375, 0.1780548095703125, -0.50384521484375, 0.099700927734375, -0.177581787109375, 0.4690513610839844, 0.8044281005859375, 0.610748291015625, 1.0652923583984375, -0.6732635498046875, -0.4589214324951172, 0.06176948547363281, 0.21372604370117188, -0.5949134826660156, -0.535858154296875, 0.2673912048339844, -0.013370513916015625, 0.40496826171875, -0.25872230529785156, -0.286224365234375, 0.2828521728515625, -0.2717742919921875, -0.5025672912597656, 0.49764251708984375, -0.5598907470703125, 0.12439727783203125, -0.12111091613769531, -0.352630615234375, -0.1207427978515625, 0.07453155517578125, 0.8794097900390625, -0.23491668701171875, -0.05254554748535156, -0.2314739227294922, -0.1540679931640625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000011.npy"} +{"epoch": 0.016152716593245228, "step": 12, "batch_size": 64, "mean": 0.03697209060192108, "std": 0.3862837553024292, "min": -1.2092437744140625, "p10": -0.400642967224121, "median": 0.037395477294921875, "p90": 0.4642551422119141, "max": 1.49822998046875, "pos_frac": 0.546875, "sample": [0.5009765625, -0.3280487060546875, 0.2237091064453125, -1.2092437744140625, 0.1812744140625, -0.18491363525390625, 0.02790069580078125, 0.16443252563476562, -0.14470672607421875, 0.4172401428222656, 0.0468902587890625, -0.1834869384765625, -0.20619964599609375, -0.05327606201171875, 0.16124725341796875, 0.25212860107421875, -0.33998870849609375, 0.443023681640625, 0.20025253295898438, -0.33847808837890625, -0.252166748046875, -0.13727569580078125, -0.084259033203125, -0.6483840942382812, -0.4399833679199219, 0.12592697143554688, 0.1318359375, 1.49822998046875, -0.2228717803955078, 0.00455474853515625, -0.4975738525390625, 0.47589111328125, 0.2904510498046875, 0.3227043151855469, 0.31633758544921875, 0.22776222229003906, -0.110198974609375, -0.50762939453125, -0.23465728759765625, -0.18487167358398438, -0.6324615478515625, 0.4502601623535156, -0.4266376495361328, 0.4862632751464844, 0.1915130615234375, 0.10385894775390625, 0.287506103515625, -0.09662628173828125, 0.1368570327758789, 0.47025299072265625, -0.04898643493652344, 0.023736953735351562, -0.1199951171875, 0.3751697540283203, 0.058807373046875, -0.09234619140625, 0.05816650390625, -0.2723674774169922, -0.057621002197265625, -0.03347015380859375, 0.35550689697265625, 0.7174720764160156, 0.5589599609375, 0.16783905029296875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000012.npy"} +{"epoch": 0.01762114537444934, "step": 13, "batch_size": 64, "mean": 0.0019735991954803467, "std": 0.42141592502593994, "min": -1.0727996826171875, "p10": -0.4444110870361328, "median": -0.037906646728515625, "p90": 0.48707790374755877, "max": 1.5062255859375, "pos_frac": 0.484375, "sample": [-0.1951446533203125, -0.2697868347167969, -0.21967315673828125, -0.2483367919921875, -0.7089767456054688, 0.06937408447265625, 0.6371231079101562, 0.08794021606445312, 0.34210968017578125, -0.22402572631835938, -0.46089935302734375, 0.13953399658203125, -0.6208114624023438, -0.030735015869140625, -0.10294914245605469, 1.5062255859375, -0.229705810546875, -0.4404296875, -0.13364219665527344, -0.324066162109375, 0.13097572326660156, -0.09925079345703125, -0.2651824951171875, 0.11240386962890625, 0.43416404724121094, -0.347320556640625, 0.00920867919921875, -0.13610267639160156, -0.1778717041015625, -0.4224090576171875, -0.4863166809082031, 0.5259933471679688, -0.05609893798828125, 0.24992752075195312, -1.0727996826171875, 0.08640289306640625, -0.19818878173828125, 0.5034961700439453, 0.10600090026855469, 0.017578125, 0.4081306457519531, -0.10482406616210938, 0.56341552734375, 0.20801925659179688, 0.220489501953125, 0.11935806274414062, -0.13698577880859375, 0.037181854248046875, 0.23111724853515625, -0.045078277587890625, -0.59954833984375, 0.5081024169921875, -0.3138275146484375, -0.06905364990234375, -0.36028289794921875, 0.01364898681640625, 0.31551361083984375, 0.0404510498046875, 1.41162109375, -0.4461174011230469, 0.220367431640625, 0.44876861572265625, 0.10567474365234375, -0.13756561279296875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000013.npy"} +{"epoch": 0.01908957415565345, "step": 14, "batch_size": 64, "mean": 0.005887240171432495, "std": 0.47432461380958557, "min": -1.7574920654296875, "p10": -0.5578521728515624, "median": 0.05684852600097656, "p90": 0.5454355239868165, "max": 0.9806365966796875, "pos_frac": 0.515625, "sample": [-0.702301025390625, -0.06643104553222656, -0.12323188781738281, -0.3801918029785156, 0.8169097900390625, -0.10453033447265625, -0.59210205078125, -0.342376708984375, 0.26978302001953125, 0.28733062744140625, 0.06116485595703125, -0.17327499389648438, -0.298004150390625, 0.9806365966796875, -0.09050369262695312, -0.145599365234375, -0.1278057098388672, -0.18743133544921875, 0.19170761108398438, -0.40328025817871094, 0.58392333984375, -0.0738525390625, -0.3086700439453125, 0.4253387451171875, 0.4251556396484375, -0.17115402221679688, -1.7574920654296875, 0.10682296752929688, 0.4739227294921875, 0.7368316650390625, 0.237548828125, 0.1344146728515625, 0.30023193359375, 0.21122360229492188, 0.2551860809326172, 0.521484375, 0.12491226196289062, 0.091217041015625, 0.23730850219726562, 0.09773445129394531, -0.613494873046875, -0.1748199462890625, -1.1167755126953125, 0.35277557373046875, -0.17748641967773438, 0.052532196044921875, 0.33838653564453125, -0.2690086364746094, -0.477935791015625, -0.6894073486328125, -0.68414306640625, 0.368316650390625, 0.33673095703125, -0.18532943725585938, 0.33313751220703125, 0.8813629150390625, 0.4001617431640625, 0.697418212890625, -0.4406013488769531, -0.301727294921875, 0.5557003021240234, -0.31392860412597656, 0.16382598876953125, -0.18146324157714844], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000014.npy"} +{"epoch": 0.020558002936857563, "step": 15, "batch_size": 64, "mean": 0.042571812868118286, "std": 0.3996790945529938, "min": -1.4478912353515625, "p10": -0.36165161132812496, "median": 0.03989601135253906, "p90": 0.4236572265625, "max": 1.061248779296875, "pos_frac": 0.59375, "sample": [-0.01983642578125, 1.061248779296875, 0.36983680725097656, -0.13118934631347656, 0.5340576171875, 0.12329673767089844, -0.22081756591796875, -0.59490966796875, 0.343353271484375, 0.24109649658203125, -0.23749542236328125, -0.2243366241455078, 0.22206878662109375, -0.150115966796875, 0.40198326110839844, 0.308135986328125, 0.03014373779296875, -0.04199981689453125, 0.4159393310546875, 0.2230224609375, 0.35283660888671875, -1.4478912353515625, 0.30454254150390625, 0.423614501953125, -0.6949138641357422, 0.18471527099609375, -0.03923797607421875, 0.5951766967773438, -0.7768402099609375, 0.369110107421875, -0.322601318359375, -0.5036239624023438, -0.0906524658203125, 0.007541656494140625, 0.1980438232421875, 0.37091064453125, -0.2776336669921875, 0.6626739501953125, 0.03383636474609375, -0.1868419647216797, -0.09607696533203125, 0.04303550720214844, 0.05637550354003906, 0.03675651550292969, 0.38182830810546875, -0.21947479248046875, 0.13816070556640625, -0.2112598419189453, 0.1972808837890625, 0.00920867919921875, 0.23748016357421875, -0.037387847900390625, -0.2965087890625, 0.0299530029296875, 0.05367279052734375, 0.45801544189453125, 0.7099609375, -0.1402587890625, 0.423675537109375, -0.378387451171875, -0.14821624755859375, -0.8038597106933594, 0.12108230590820312, 0.343292236328125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000015.npy"} +{"epoch": 0.022026431718061675, "step": 16, "batch_size": 64, "mean": 0.01766011118888855, "std": 0.3466501533985138, "min": -0.86865234375, "p10": -0.3907011032104492, "median": 0.06450271606445312, "p90": 0.39504623413085954, "max": 0.8655319213867188, "pos_frac": 0.546875, "sample": [0.18980979919433594, 0.66357421875, -0.6668243408203125, -0.0475921630859375, 0.17347335815429688, -0.15378189086914062, 0.213409423828125, -0.029193878173828125, -0.24835968017578125, 0.31072235107421875, -0.1658172607421875, 0.41345977783203125, 0.7739486694335938, -0.20687103271484375, 0.1680145263671875, -0.123565673828125, -0.4710960388183594, -0.294830322265625, -0.1444091796875, 0.129486083984375, 0.352081298828125, 0.2570629119873047, 0.17861175537109375, -0.08028411865234375, 0.10701179504394531, -0.5972499847412109, -0.86865234375, -0.10491561889648438, -0.08242225646972656, -0.13831520080566406, 0.2745094299316406, 0.0635986328125, 0.085968017578125, -0.3639488220214844, 0.0897979736328125, 0.299835205078125, 0.45807647705078125, -0.3952484130859375, 0.18021392822265625, 0.06528472900390625, 0.2135162353515625, -0.3683624267578125, 0.1837310791015625, 0.068145751953125, 0.47052764892578125, 0.20203781127929688, 0.11396980285644531, -0.38009071350097656, -0.1209259033203125, 0.063720703125, 0.2963829040527344, 0.073028564453125, -0.5402679443359375, 0.17634201049804688, -0.036029815673828125, -0.3296661376953125, 0.026758193969726562, 0.2249755859375, 0.8655319213867188, 0.760955810546875, -0.0105133056640625, -0.2221527099609375, -0.11468887329101562, -0.751251220703125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000016.npy"} +{"epoch": 0.023494860499265784, "step": 17, "batch_size": 64, "mean": 0.04995712637901306, "std": 0.3403526842594147, "min": -1.0746307373046875, "p10": -0.38928680419921874, "median": 0.10266304016113281, "p90": 0.46135005950927743, "max": 0.7584075927734375, "pos_frac": 0.5625, "sample": [-0.39589691162109375, 0.11391067504882812, 0.10315704345703125, 0.08905792236328125, -0.4517021179199219, -0.2990150451660156, 0.4117584228515625, -1.0746307373046875, 0.2425994873046875, -0.2827720642089844, 0.264434814453125, 0.4463043212890625, 0.2676239013671875, 0.4098358154296875, -0.05014991760253906, -0.41629791259765625, -0.3270225524902344, -0.06249809265136719, -0.08709335327148438, 0.5214004516601562, -0.151824951171875, 0.10216903686523438, 0.3195533752441406, 0.2023773193359375, -0.37386322021484375, 0.08351898193359375, 0.46779823303222656, 0.02637481689453125, 0.2725067138671875, 0.5368423461914062, -0.17400360107421875, 0.5139389038085938, -0.07216644287109375, -0.20311737060546875, 0.12566375732421875, -0.1335601806640625, -0.2041168212890625, 0.1940593719482422, -0.36452674865722656, 0.2518310546875, 0.7584075927734375, -0.2672615051269531, -0.10518264770507812, 0.17900657653808594, -0.4076385498046875, 0.2266387939453125, 0.2775306701660156, -0.4835968017578125, 0.14487457275390625, -0.02825164794921875, 0.1732940673828125, 0.31543731689453125, 0.4173126220703125, -0.2826385498046875, 0.16281890869140625, -0.1549072265625, 0.69805908203125, -0.15474319458007812, 0.5043487548828125, -0.47869110107421875, 0.37574195861816406, 0.26998138427734375, 0.3222503662109375, -0.10799407958984375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000017.npy"} +{"epoch": 0.024963289280469897, "step": 18, "batch_size": 64, "mean": 0.0720413327217102, "std": 0.34716373682022095, "min": -0.6702194213867188, "p10": -0.4195545196533203, "median": 0.11382770538330078, "p90": 0.46426315307617194, "max": 0.8337326049804688, "pos_frac": 0.625, "sample": [0.15647506713867188, -0.484649658203125, 0.8337326049804688, -0.6702194213867188, 0.4904441833496094, -0.5107192993164062, -0.2493000030517578, 0.1615447998046875, -0.147064208984375, 0.19940757751464844, -0.65216064453125, 0.06730079650878906, 0.34656524658203125, 0.4149818420410156, -0.3384246826171875, 0.7154769897460938, -0.047672271728515625, -0.013235092163085938, 0.24217605590820312, -0.32990264892578125, 0.028720855712890625, -0.11266326904296875, 0.57470703125, 0.09141731262207031, 0.3049888610839844, 0.19597434997558594, 0.6939010620117188, 0.4122161865234375, -0.0227508544921875, 0.15872955322265625, 0.04019927978515625, 0.2205486297607422, 0.39611053466796875, -0.17760467529296875, 0.1887359619140625, -0.3056068420410156, 0.3214111328125, 0.24409866333007812, -0.13922119140625, 0.251312255859375, 0.2051715850830078, 0.15720367431640625, 0.00252532958984375, 0.45044708251953125, 0.470184326171875, 0.0811614990234375, 0.41426849365234375, -0.37943458557128906, -0.292205810546875, 0.5903282165527344, 0.4344329833984375, 0.05542945861816406, -0.550323486328125, -0.4276237487792969, 0.09030914306640625, 0.13623809814453125, 0.3430023193359375, -0.12582778930664062, -0.5130157470703125, 0.36625099182128906, -0.17663002014160156, -0.400726318359375, -0.0260009765625, 0.15549850463867188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000018.npy"} +{"epoch": 0.02643171806167401, "step": 19, "batch_size": 64, "mean": 0.03309273719787598, "std": 0.37767040729522705, "min": -0.919586181640625, "p10": -0.41548614501953124, "median": 0.03559398651123047, "p90": 0.4299766540527344, "max": 1.22125244140625, "pos_frac": 0.546875, "sample": [0.3003101348876953, -0.1262950897216797, -0.1925811767578125, -0.1210784912109375, 0.15720367431640625, -0.549346923828125, 0.28835296630859375, 0.057231903076171875, 0.36476707458496094, 0.87115478515625, 0.040771484375, 0.20681381225585938, 0.3739013671875, -0.1371917724609375, 0.37469482421875, -0.7832794189453125, -0.5169677734375, -0.17616653442382812, -0.551971435546875, 0.1899871826171875, -0.3394775390625, 0.001903533935546875, -0.056537628173828125, -0.4267730712890625, -0.15340232849121094, 0.3958759307861328, 0.381744384765625, -0.419097900390625, -0.26918792724609375, 0.17431640625, -0.25457000732421875, 0.2967243194580078, 0.132476806640625, 0.1977519989013672, 0.4660797119140625, -0.19588851928710938, -0.20223236083984375, 0.12571144104003906, 0.28264617919921875, -0.919586181640625, -0.08292007446289062, -0.2020111083984375, -0.10102081298828125, -0.26336669921875, 0.030416488647460938, 0.39019012451171875, 0.10535430908203125, 0.3272552490234375, 0.2432708740234375, -0.10577964782714844, 0.475311279296875, -0.06653594970703125, 0.31412506103515625, 0.0941619873046875, 0.42288970947265625, 0.016330718994140625, 0.6128311157226562, -0.3283042907714844, -0.4070587158203125, 0.433013916015625, 1.22125244140625, -0.3986701965332031, -0.3443450927734375, 0.44275665283203125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000019.npy"} +{"epoch": 0.027900146842878122, "step": 20, "batch_size": 64, "mean": 0.0407865047454834, "std": 0.3044738173484802, "min": -0.8436050415039062, "p10": -0.34591827392578123, "median": 0.04765033721923828, "p90": 0.44367828369140627, "max": 0.69415283203125, "pos_frac": 0.5625, "sample": [0.0401611328125, -0.1297321319580078, -0.013437271118164062, 0.185089111328125, -0.246978759765625, -0.012990951538085938, -0.02562713623046875, 0.4481048583984375, -0.42668914794921875, 0.48529052734375, -0.20827865600585938, 0.17706298828125, 0.5724639892578125, 0.2337207794189453, -0.347381591796875, -0.15204238891601562, -0.4179573059082031, 0.34481048583984375, -0.34444427490234375, 0.433349609375, 0.25765228271484375, -0.244598388671875, -0.08906173706054688, 0.005645751953125, -0.03691864013671875, -0.12346267700195312, 0.04225921630859375, 0.2571830749511719, -0.13786697387695312, -0.16392135620117188, 0.06283950805664062, -0.0994873046875, 0.10725021362304688, 0.11693572998046875, -0.23664283752441406, -0.09218597412109375, 0.576507568359375, -0.8003082275390625, -0.13131332397460938, -0.34654998779296875, 0.69415283203125, -0.153076171875, 0.20285797119140625, 0.22525787353515625, 0.1842803955078125, 0.10416603088378906, 0.3072357177734375, 0.15179443359375, 0.33367156982421875, -0.11705780029296875, -0.8436050415039062, 0.3153839111328125, 0.0039730072021484375, 0.14592742919921875, 0.35906219482421875, -0.4223747253417969, 0.076934814453125, 0.05304145812988281, 0.17299461364746094, -0.03491783142089844, 0.49225616455078125, 0.25035858154296875, 0.5001907348632812, 0.08937835693359375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000020.npy"} +{"epoch": 0.02936857562408223, "step": 21, "batch_size": 64, "mean": 0.11629366874694824, "std": 0.34868374466896057, "min": -0.542572021484375, "p10": -0.3007652282714844, "median": 0.08113384246826172, "p90": 0.584410858154297, "max": 1.082366943359375, "pos_frac": 0.625, "sample": [-0.11867523193359375, 0.10291862487792969, 0.9004364013671875, 0.7556304931640625, -0.541839599609375, 0.04932403564453125, -0.13915061950683594, -0.014606475830078125, 0.408660888671875, 0.1559600830078125, 0.16485595703125, -0.2048492431640625, 0.219207763671875, 0.21224594116210938, 0.24506378173828125, 0.3925628662109375, -0.17135047912597656, 0.2142467498779297, -0.3315620422363281, 0.14999771118164062, -0.33929443359375, -0.3023834228515625, -0.11930084228515625, 0.12373733520507812, -0.1924266815185547, 1.082366943359375, 0.601043701171875, -0.03367424011230469, 0.3176727294921875, 0.16845703125, -0.542572021484375, 0.3697357177734375, -0.05687713623046875, 0.0054340362548828125, -0.00653076171875, 0.0053005218505859375, 0.008026123046875, 0.0272369384765625, 0.4083099365234375, -0.3558807373046875, 0.05934906005859375, 0.2492198944091797, 0.16623687744140625, 0.030611038208007812, -0.044490814208984375, -0.2507190704345703, 0.12377166748046875, 0.0441436767578125, 0.10421180725097656, -0.47550201416015625, 0.11954307556152344, 0.665618896484375, 0.5430793762207031, 0.3914337158203125, 0.35489654541015625, -0.09962844848632812, 0.879913330078125, -0.146392822265625, 0.8197174072265625, 0.3402690887451172, -0.0603790283203125, -0.29698944091796875, 0.5456008911132812, -0.23817825317382812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000021.npy"} +{"epoch": 0.030837004405286344, "step": 22, "batch_size": 64, "mean": 0.2547217905521393, "std": 0.44475993514060974, "min": -0.8624706268310547, "p10": -0.2400030136108398, "median": 0.15310287475585938, "p90": 0.9623382568359375, "max": 1.4829788208007812, "pos_frac": 0.734375, "sample": [0.035923004150390625, 0.980712890625, -0.06930160522460938, -0.06788253784179688, 0.08294677734375, -0.0184326171875, 0.290802001953125, 0.4260749816894531, 0.30849647521972656, 0.276275634765625, 0.7709884643554688, 0.32049560546875, 0.0319366455078125, 1.05975341796875, -0.8624706268310547, -0.457183837890625, 0.4235076904296875, 0.04728126525878906, -0.21080780029296875, 0.260467529296875, -0.2704124450683594, 0.78326416015625, -0.33222198486328125, 0.0670928955078125, -0.08090972900390625, 0.19070816040039062, 1.1052474975585938, 0.3139457702636719, 0.37674713134765625, 1.1242523193359375, 0.1585235595703125, -0.09964752197265625, 0.7913665771484375, 0.17182159423828125, 0.6412849426269531, 1.4829788208007812, 0.2382354736328125, -0.10492706298828125, 0.08147239685058594, 0.10477447509765625, 0.067138671875, 0.5737838745117188, 0.11968421936035156, 0.14768218994140625, -0.021257400512695312, 0.9729766845703125, -0.21118927001953125, -0.12823486328125, 0.9375152587890625, 0.594146728515625, 0.11484527587890625, 0.22129058837890625, -0.2523517608642578, 0.055538177490234375, -0.380584716796875, 0.094970703125, 0.30654144287109375, 0.4021263122558594, 0.06716156005859375, 1.110321044921875, -0.269775390625, 0.7171707153320312, 0.6270751953125, 0.06243896484375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000022.npy"} +{"epoch": 0.032305433186490456, "step": 23, "batch_size": 64, "mean": 0.22987452149391174, "std": 0.457994282245636, "min": -0.82183837890625, "p10": -0.2545526504516601, "median": 0.22069740295410156, "p90": 0.7746675491333009, "max": 1.8461761474609375, "pos_frac": 0.6875, "sample": [-0.199676513671875, 0.25476837158203125, 0.7856292724609375, 0.22777938842773438, 0.3228607177734375, 0.3737678527832031, 0.17775726318359375, 0.085906982421875, -0.18781471252441406, -0.4883880615234375, 1.32177734375, 1.5785980224609375, 0.6011962890625, -0.14633941650390625, 0.195220947265625, -0.025234222412109375, 0.4529876708984375, 0.28570556640625, -0.11773681640625, 0.2135601043701172, 0.28838348388671875, 0.2975311279296875, 0.947357177734375, -0.08436965942382812, -0.09763526916503906, -0.2900543212890625, 0.05196380615234375, -0.41394805908203125, 0.2342529296875, -0.1241607666015625, 0.3320655822753906, 0.4626617431640625, -0.10999298095703125, 0.6215114593505859, 0.1658306121826172, 0.0041351318359375, -0.1615753173828125, 0.13061904907226562, 0.2236042022705078, 0.35614585876464844, 0.3809471130371094, 0.1780548095703125, 0.22101211547851562, 0.8761825561523438, -0.2729759216308594, 1.8461761474609375, 0.0565338134765625, -0.82183837890625, 0.4021797180175781, -0.41632080078125, 0.26673316955566406, 0.7490901947021484, 0.10269927978515625, -0.3599739074707031, -0.06950759887695312, 0.5271987915039062, 0.2203826904296875, 0.7968292236328125, -0.2115650177001953, 0.3936920166015625, 0.39246368408203125, -0.0866546630859375, 0.48145294189453125, 0.5125255584716797], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000023.npy"} +{"epoch": 0.033773861967694566, "step": 24, "batch_size": 64, "mean": 0.24034002423286438, "std": 0.44114452600479126, "min": -1.005126953125, "p10": -0.14784164428710936, "median": 0.2076549530029297, "p90": 0.75387077331543, "max": 1.446624755859375, "pos_frac": 0.75, "sample": [0.48496246337890625, 0.6820945739746094, 0.08336257934570312, 0.24327850341796875, 0.6244926452636719, 0.03178596496582031, 0.5233688354492188, -0.12078857421875, 0.79150390625, 0.250640869140625, 0.7924709320068359, -0.18362808227539062, 0.11735343933105469, 0.20606613159179688, 0.3367652893066406, 0.13002395629882812, -0.0743560791015625, 0.6824588775634766, 0.5692100524902344, 0.18076324462890625, -0.051326751708984375, -0.8686141967773438, -0.07432937622070312, 0.1497802734375, 0.77716064453125, 0.6995277404785156, -0.039325714111328125, -0.823333740234375, 0.3966522216796875, 0.17090606689453125, 1.237457275390625, 0.56158447265625, 0.4539337158203125, -0.5201034545898438, 0.3914527893066406, -0.13226318359375, 0.2092437744140625, 0.0998992919921875, -0.15451812744140625, 0.38855743408203125, 0.859893798828125, 0.31603240966796875, -0.0674285888671875, 0.34717559814453125, 0.0768280029296875, 1.446624755859375, 0.85498046875, 0.0300445556640625, 0.69683837890625, 0.13472938537597656, 0.18185806274414062, 0.4364299774169922, 0.002155303955078125, 0.06342887878417969, 0.3604583740234375, -0.0074462890625, 0.4690971374511719, -0.002044677734375, 0.4126167297363281, -0.5593109130859375, 0.33188629150390625, -1.005126953125, 0.1757049560546875, 0.6021652221679688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000024.npy"} +{"epoch": 0.03524229074889868, "step": 25, "batch_size": 64, "mean": 0.22478067874908447, "std": 0.4554341733455658, "min": -0.8603401184082031, "p10": -0.2547479629516601, "median": 0.24921417236328125, "p90": 0.7023128509521486, "max": 1.6734085083007812, "pos_frac": 0.671875, "sample": [0.3525886535644531, 0.3723258972167969, 0.535980224609375, 0.5105667114257812, 0.08325958251953125, -0.18616294860839844, -0.1772308349609375, 0.9092178344726562, 0.553253173828125, 0.10045242309570312, -0.09876251220703125, 0.48663330078125, -0.09818077087402344, 0.32140350341796875, 1.6734085083007812, -0.8603401184082031, -0.46956443786621094, 0.513397216796875, -0.13840103149414062, 0.7165412902832031, -0.5850067138671875, 0.19156646728515625, 0.3676795959472656, -0.09759521484375, -0.08834075927734375, 0.5571823120117188, -0.05887603759765625, 0.5774307250976562, -0.28414154052734375, -0.07079124450683594, -0.13463592529296875, 0.00801849365234375, -0.16278839111328125, 0.38922119140625, 0.04811859130859375, 0.5956573486328125, -0.015108108520507812, 1.2146148681640625, 0.34979248046875, 0.11365509033203125, 0.012996673583984375, 0.5067291259765625, 0.9080734252929688, 0.4977836608886719, 0.2793083190917969, 0.5333480834960938, 0.2547607421875, 0.209503173828125, 0.6691131591796875, 0.07845687866210938, 0.541351318359375, 0.1640777587890625, -0.36009979248046875, -0.617340087890625, 0.7575149536132812, -0.030626296997070312, -0.8153457641601562, 0.9814643859863281, 0.35024261474609375, 0.5904960632324219, 0.4701080322265625, 0.2436676025390625, -0.13561248779296875, 0.2799530029296875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000025.npy"} +{"epoch": 0.03671071953010279, "step": 26, "batch_size": 64, "mean": 0.41762077808380127, "std": 0.5279146432876587, "min": -0.48786163330078125, "p10": -0.17408313751220703, "median": 0.31130504608154297, "p90": 1.093994140625, "max": 1.984405517578125, "pos_frac": 0.765625, "sample": [0.5872802734375, 0.19060516357421875, 0.38202667236328125, 0.12454986572265625, 1.228515625, 0.586639404296875, -0.251190185546875, 0.7493247985839844, -0.48786163330078125, -0.09865188598632812, 1.984405517578125, 0.7229232788085938, 0.984283447265625, 0.13309478759765625, -0.18688201904296875, 0.79241943359375, 0.6906814575195312, 0.531524658203125, 0.877899169921875, -0.18140792846679688, 0.3133678436279297, -0.026338577270507812, 0.25078582763671875, 0.17431259155273438, 0.5818405151367188, 1.0086135864257812, 0.1636505126953125, 0.277435302734375, 0.250091552734375, 1.0969467163085938, 0.05860137939453125, -0.46954345703125, 0.6144237518310547, 0.1744518280029297, -0.09722518920898438, 0.5162200927734375, 1.399261474609375, -0.14942169189453125, -0.05709075927734375, 1.5671768188476562, 0.3897705078125, -0.1045684814453125, 0.7231597900390625, 0.5313491821289062, 0.3845672607421875, 1.21246337890625, -0.4621467590332031, 0.5601959228515625, 0.12050247192382812, -0.15699195861816406, -0.04840087890625, 0.30924224853515625, -0.3317852020263672, 0.6573486328125, 0.0018768310546875, 0.8720703125, 1.0871047973632812, 1.0695266723632812, 1.4944381713867188, 0.3006134033203125, 0.0748443603515625, 0.039051055908203125, 0.7057533264160156, 0.2900047302246094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000026.npy"} +{"epoch": 0.0381791483113069, "step": 27, "batch_size": 64, "mean": 0.5043210983276367, "std": 0.5854207277297974, "min": -0.5514678955078125, "p10": -0.1569681167602539, "median": 0.38453102111816406, "p90": 1.3579463958740234, "max": 2.6791152954101562, "pos_frac": 0.84375, "sample": [0.15465927124023438, 0.7363395690917969, 0.6839447021484375, 0.20296096801757812, 0.6053314208984375, 1.7024154663085938, 0.3682861328125, 0.31744384765625, 1.3669357299804688, -0.23567771911621094, 1.050933837890625, 0.2599639892578125, 1.0306167602539062, 0.15649795532226562, 0.8561553955078125, 0.7423858642578125, 2.6791152954101562, -0.1093902587890625, 0.2650299072265625, 0.26490020751953125, 0.05731201171875, -0.5514678955078125, 0.8966064453125, 0.11520004272460938, 0.09000015258789062, 0.3762187957763672, 0.0004100799560546875, 0.030466079711914062, 0.2822608947753906, 1.2608108520507812, 0.49141693115234375, -0.17696189880371094, -0.3231048583984375, 0.84368896484375, 0.49556732177734375, 0.30799102783203125, 0.3853263854980469, 0.5210342407226562, 0.419342041015625, 0.16340255737304688, -0.15938568115234375, 0.5931167602539062, -0.1771087646484375, 0.47965240478515625, 0.68035888671875, 1.4501800537109375, -0.35350799560546875, 0.38373565673828125, 1.3369712829589844, 0.9259490966796875, 1.420806884765625, 0.04006195068359375, 0.35587310791015625, 0.6585693359375, 1.487640380859375, 0.516021728515625, 1.8133544921875, -0.15132713317871094, 0.5938148498535156, -0.0525970458984375, 0.271148681640625, 0.7860260009765625, 0.04451751708984375, 0.548309326171875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000027.npy"} +{"epoch": 0.039647577092511016, "step": 28, "batch_size": 64, "mean": 0.3272559344768524, "std": 0.6033403873443604, "min": -1.71392822265625, "p10": -0.24843025207519526, "median": 0.3050556182861328, "p90": 0.9826868057250978, "max": 1.819366455078125, "pos_frac": 0.765625, "sample": [0.9220809936523438, 0.3735198974609375, 0.1002349853515625, 0.17819786071777344, 0.9934597015380859, 0.19061279296875, -0.10130691528320312, 0.0528106689453125, -0.19431686401367188, 0.018625259399414062, -0.08666229248046875, 0.3130340576171875, 1.7617645263671875, 0.5123443603515625, 0.8228759765625, -0.00457000732421875, 0.057952880859375, 0.616729736328125, 0.1482391357421875, 0.6735458374023438, 0.24190521240234375, -0.3449249267578125, -0.2716217041015625, 0.957550048828125, -0.623687744140625, 0.2631683349609375, 0.2796630859375, 0.756134033203125, 1.2381973266601562, 0.6581306457519531, 0.6060028076171875, 0.343780517578125, 0.23841476440429688, -0.009185791015625, 0.6288719177246094, 0.5606441497802734, 0.7598686218261719, 0.30052947998046875, 0.4422798156738281, -0.1202545166015625, 0.40807342529296875, 0.28203582763671875, -0.024707794189453125, 1.092987060546875, 1.819366455078125, 0.2257080078125, 0.431427001953125, 0.06739997863769531, 1.4967155456542969, 0.5025100708007812, 0.7260684967041016, -1.4587783813476562, 0.0257568359375, 0.4326019287109375, -0.4673728942871094, 0.3095817565917969, 0.44582366943359375, 1.4161529541015625, 0.6877288818359375, -0.09466743469238281, 0.23150634765625, -1.71392822265625, -0.48394775390625, 0.3316993713378906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000028.npy"} +{"epoch": 0.041116005873715125, "step": 29, "batch_size": 64, "mean": 0.5928229689598083, "std": 0.6201984286308289, "min": -0.9011459350585938, "p10": -0.10267028808593745, "median": 0.6038303375244141, "p90": 1.2430007934570313, "max": 2.7155914306640625, "pos_frac": 0.84375, "sample": [0.8436775207519531, -0.12082862854003906, 0.1042022705078125, 0.2373809814453125, 0.44598388671875, 0.13167953491210938, -0.0243072509765625, 1.0708541870117188, 0.7534217834472656, 0.25681304931640625, 0.205474853515625, -0.13608741760253906, 1.0427932739257812, 1.1438369750976562, 0.30185699462890625, 1.0629119873046875, 1.1212310791015625, -0.052448272705078125, -0.06030082702636719, 1.34393310546875, 0.21776580810546875, 0.8535995483398438, 0.7805709838867188, 0.23193359375, 1.2335052490234375, 0.6140213012695312, 0.09050750732421875, 0.20689773559570312, 1.102783203125, 0.371856689453125, -0.17493629455566406, 1.6555633544921875, 2.7155914306640625, -0.5545654296875, 0.9718475341796875, 0.21099090576171875, 0.18941497802734375, 0.08547782897949219, 0.8449783325195312, 1.2470703125, 1.207061767578125, 0.72894287109375, 0.6401290893554688, 0.6151351928710938, 0.3916587829589844, -0.14516067504882812, 0.902801513671875, -0.33188629150390625, 0.9161148071289062, 0.194976806640625, 0.10211944580078125, 0.1439800262451172, 1.191680908203125, 0.31508636474609375, 1.1469078063964844, 1.819091796875, -0.9011459350585938, 1.093109130859375, 0.9933815002441406, 0.5936393737792969, 1.38629150390625, 0.9810562133789062, 0.00502777099609375, 1.3837165832519531], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000029.npy"} +{"epoch": 0.042584434654919234, "step": 30, "batch_size": 64, "mean": 0.6878979206085205, "std": 0.6341769695281982, "min": -0.8761062622070312, "p10": -0.10558776855468748, "median": 0.60247802734375, "p90": 1.4384971618652345, "max": 2.423919677734375, "pos_frac": 0.84375, "sample": [1.1373367309570312, 0.01265716552734375, -0.04175758361816406, 0.42163848876953125, 1.4443359375, -0.8761062622070312, 0.41776275634765625, 0.2021026611328125, -0.18981552124023438, 0.78302001953125, 1.237548828125, 0.889129638671875, 0.5395851135253906, 1.0718650817871094, 0.416900634765625, 0.9244308471679688, 0.47220611572265625, 1.347259521484375, 0.3041229248046875, 1.1909637451171875, 0.8945846557617188, 0.639892578125, 2.423919677734375, 0.8398857116699219, 1.1721267700195312, 0.27065467834472656, 0.92596435546875, 1.1427688598632812, -0.11492156982421875, 0.4433116912841797, -0.13700103759765625, 0.15208816528320312, 0.85565185546875, -0.16875267028808594, 0.5203266143798828, 0.6488571166992188, 1.8267669677734375, 2.3553009033203125, 1.031982421875, 0.7421951293945312, -0.010843276977539062, 1.1269683837890625, 0.45809173583984375, 0.5650634765625, -0.176422119140625, 1.9651641845703125, 1.0155181884765625, 0.3722858428955078, 0.9698143005371094, 1.0364456176757812, 1.56890869140625, 1.4248733520507812, 0.3137359619140625, -0.27877044677734375, -0.08380889892578125, 0.9131011962890625, 0.4574165344238281, 0.44028472900390625, 0.7144241333007812, 0.033123016357421875, 0.4851531982421875, 1.7348175048828125, 0.3161773681640625, 0.49315643310546875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000030.npy"} +{"epoch": 0.04405286343612335, "step": 31, "batch_size": 64, "mean": 0.4500678479671478, "std": 0.6682660579681396, "min": -0.5946807861328125, "p10": -0.3277252197265625, "median": 0.3796844482421875, "p90": 1.2425949096679694, "max": 2.9293136596679688, "pos_frac": 0.703125, "sample": [0.17922019958496094, 0.9642486572265625, -0.373016357421875, 0.5541229248046875, 1.933074951171875, 0.4388580322265625, -0.3123130798339844, -0.5761642456054688, 0.8582611083984375, -0.5422821044921875, 2.9293136596679688, -0.07177734375, -0.047576904296875, 0.3858489990234375, 0.18045425415039062, 0.30556297302246094, 1.0612258911132812, 0.28223228454589844, 2.1464157104492188, 0.7805519104003906, -0.0917510986328125, -0.13671493530273438, 0.17038726806640625, 0.1144256591796875, 0.4411735534667969, 0.2729167938232422, 0.8315773010253906, 0.2875175476074219, 0.5094432830810547, -0.16466140747070312, 0.852020263671875, 0.8104248046875, 1.6673126220703125, -0.0097808837890625, 0.45926475524902344, -0.344482421875, 0.23529815673828125, 0.9440383911132812, -0.330352783203125, 0.5039215087890625, 1.070831298828125, 0.8904647827148438, -0.23785781860351562, 0.3651103973388672, -0.3251190185546875, -0.5946807861328125, 0.6250076293945312, 0.627899169921875, -0.17551422119140625, -0.03717041015625, 0.2910804748535156, 1.3162078857421875, 1.3505401611328125, -0.3288421630859375, -0.144683837890625, 0.6486587524414062, 0.25946044921875, 0.3735198974609375, 0.7632026672363281, 0.43221282958984375, 0.795440673828125, 1.4430923461914062, 0.9047775268554688, 0.39246368408203125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000031.npy"} +{"epoch": 0.04552129221732746, "step": 32, "batch_size": 64, "mean": 0.7446720600128174, "std": 0.9461292624473572, "min": -0.6966209411621094, "p10": -0.24613609313964835, "median": 0.5001411437988281, "p90": 2.269461059570313, "max": 4.2985382080078125, "pos_frac": 0.828125, "sample": [0.17803955078125, 0.3624305725097656, 0.627349853515625, 1.1696319580078125, 0.8597030639648438, 0.7046051025390625, 0.1541900634765625, 0.502685546875, 0.3002796173095703, 0.4177703857421875, 1.1039962768554688, 0.6780853271484375, -0.4565391540527344, -0.06310462951660156, 0.08642578125, 1.1566390991210938, -0.3619270324707031, 0.9800872802734375, -0.12134933471679688, 2.370086669921875, 1.4594573974609375, 1.4775009155273438, 2.5577545166015625, -0.550750732421875, 0.8525047302246094, 0.5722198486328125, 0.298553466796875, 0.101898193359375, -0.6966209411621094, 0.9044952392578125, 0.33568572998046875, 0.1282672882080078, 0.3458595275878906, 3.1822052001953125, 1.2992172241210938, 0.28808021545410156, 1.240570068359375, 0.7751197814941406, -0.14366912841796875, 0.34966278076171875, 2.1685028076171875, 1.2198200225830078, -0.2900505065917969, 0.2886962890625, 4.2985382080078125, 0.9290542602539062, 0.396575927734375, 2.5006942749023438, 2.3127288818359375, -0.48062705993652344, 0.1827564239501953, 0.49759674072265625, 2.5672607421875, 1.0455398559570312, -0.6149559020996094, -0.05403900146484375, 0.8461494445800781, 0.991424560546875, 0.2878589630126953, 0.436187744140625, 0.10408401489257812, 0.6162948608398438, 1.5881805419921875, 0.3936424255371094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000032.npy"} +{"epoch": 0.04698972099853157, "step": 33, "batch_size": 64, "mean": 0.6417955160140991, "std": 0.667909562587738, "min": -0.7734222412109375, "p10": -0.20271682739257804, "median": 0.6042671203613281, "p90": 1.5934906005859377, "max": 2.473480224609375, "pos_frac": 0.84375, "sample": [-0.264923095703125, 0.4782447814941406, 0.4182243347167969, 0.4909820556640625, 2.473480224609375, -0.07321548461914062, 0.4676971435546875, 1.625701904296875, 0.5564823150634766, 0.5656375885009766, 0.6801681518554688, 0.8240585327148438, 1.612030029296875, 0.8463058471679688, 1.0795745849609375, 0.23966598510742188, 0.7810821533203125, -0.7734222412109375, 0.6719970703125, -0.76220703125, -0.11254119873046875, 1.55023193359375, 1.206939697265625, 2.3340072631835938, 0.6942672729492188, 0.09200286865234375, 1.68798828125, 0.0196380615234375, 0.23436737060546875, 1.821044921875, 0.1550445556640625, 0.214019775390625, 2.2111968994140625, 0.1400909423828125, -0.264892578125, 0.6038894653320312, 0.18965911865234375, 0.5582199096679688, 0.9784126281738281, 0.6714630126953125, 0.6224594116210938, 1.0051116943359375, -0.241363525390625, 0.45526123046875, 0.2662544250488281, 1.0802764892578125, 0.6370620727539062, 1.04583740234375, 0.8228034973144531, -0.399261474609375, 0.26422882080078125, -0.2520332336425781, 0.8558692932128906, 0.5520286560058594, 0.604644775390625, 1.1482467651367188, 1.1772193908691406, -0.0239715576171875, 0.906524658203125, 0.2822456359863281, 0.8658218383789062, 0.46395111083984375, 0.7936496734619141, 1.2194328308105469], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000033.npy"} +{"epoch": 0.048458149779735685, "step": 34, "batch_size": 64, "mean": 0.8307995796203613, "std": 0.8640182614326477, "min": -0.837158203125, "p10": -0.0704975128173828, "median": 0.5611610412597656, "p90": 1.9912551879882814, "max": 3.329620361328125, "pos_frac": 0.859375, "sample": [2.3612823486328125, 0.11446762084960938, 0.628265380859375, -0.051364898681640625, 2.6786346435546875, 0.4264392852783203, 1.95294189453125, 1.376068115234375, 0.3538970947265625, 0.4149169921875, 0.1855621337890625, -0.0976715087890625, 0.500701904296875, -0.07869720458984375, 2.0076751708984375, 2.237945556640625, 0.09705352783203125, -0.16437911987304688, 1.60369873046875, 0.38336944580078125, 1.2949371337890625, 0.08861923217773438, 1.152547836303711, 1.5628738403320312, 1.69342041015625, 1.8554306030273438, 1.6108245849609375, -0.30040740966796875, 0.1324920654296875, 1.2390060424804688, -0.25272369384765625, 0.7903976440429688, 0.726287841796875, 0.9918785095214844, 2.515899658203125, 0.8905067443847656, 1.2867469787597656, 3.329620361328125, 0.6281719207763672, -0.04912567138671875, 0.3182411193847656, 0.3659706115722656, 1.1075248718261719, 1.7475128173828125, 0.5336151123046875, 0.8096427917480469, 0.1361827850341797, 0.19451522827148438, 2.193817138671875, 0.21805191040039062, 0.14590072631835938, -0.837158203125, 0.43906402587890625, 0.8818206787109375, 0.2673187255859375, -0.29332542419433594, 1.789794921875, 0.3659324645996094, 0.3306121826171875, 0.5887069702148438, 1.636505126953125, 1.81689453125, 0.09874153137207031, 0.19707870483398438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000034.npy"} +{"epoch": 0.049926578560939794, "step": 35, "batch_size": 64, "mean": 1.2235569953918457, "std": 1.1256182193756104, "min": -0.5068817138671875, "p10": 0.12170696258544927, "median": 1.0032234191894531, "p90": 2.3609146118164066, "max": 7.0582275390625, "pos_frac": 0.921875, "sample": [1.1066818237304688, 1.1678276062011719, 2.203216552734375, 1.3929824829101562, -0.129730224609375, 0.68206787109375, 1.391510009765625, -0.05718231201171875, 0.09432601928710938, 1.235107421875, 1.56158447265625, 0.26213645935058594, 1.3677825927734375, 0.87774658203125, 0.42236328125, 0.478973388671875, 1.7668495178222656, 3.1750869750976562, 0.9655303955078125, 0.3497161865234375, 0.9732818603515625, 2.1713104248046875, 2.980682373046875, 0.1839447021484375, 0.46503448486328125, 0.5214385986328125, 0.49405670166015625, 1.042205810546875, 0.7721710205078125, 0.927642822265625, 7.0582275390625, 1.4315376281738281, 1.141845703125, 1.3273468017578125, 0.32256317138671875, 3.4044952392578125, 0.2558174133300781, 1.670379638671875, 2.6223907470703125, 2.0303573608398438, 0.6459274291992188, 2.2983551025390625, 0.9623641967773438, 1.715179443359375, 0.7875862121582031, -0.1348724365234375, 1.87347412109375, 2.387725830078125, 0.8064956665039062, 1.0331649780273438, 0.9004096984863281, -0.0718994140625, 0.5075359344482422, 2.5883026123046875, 1.0710067749023438, 0.9010677337646484, 1.5941925048828125, 0.09503364562988281, 1.6204910278320312, -0.5068817138671875, 0.8351898193359375, 2.0814208984375, 2.0108795166015625, 0.19419479370117188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000035.npy"} +{"epoch": 0.0513950073421439, "step": 36, "batch_size": 64, "mean": 1.1225972175598145, "std": 1.2831287384033203, "min": -2.5635452270507812, "p10": -0.19100265502929686, "median": 0.9129085540771484, "p90": 3.0314445495605487, "max": 4.4469451904296875, "pos_frac": 0.796875, "sample": [1.2569961547851562, -0.11786460876464844, 0.8154773712158203, 1.5208740234375, 1.0682754516601562, 0.7111568450927734, 0.76800537109375, -0.42413330078125, -0.2462291717529297, 2.4127044677734375, 1.6421432495117188, 0.012481689453125, -0.16983795166015625, 0.46602439880371094, 0.04006767272949219, 1.7536392211914062, 0.18059539794921875, 1.7588043212890625, 1.646575927734375, 0.36441802978515625, -0.2000732421875, -0.11122322082519531, -0.1374969482421875, 0.14800453186035156, 3.649566650390625, 4.168247222900391, 1.5711746215820312, -0.6579437255859375, 3.3520050048828125, -0.02704620361328125, 1.006357192993164, 1.368438720703125, -0.38474273681640625, 2.5649185180664062, -0.0526885986328125, 0.2291889190673828, 3.23138427734375, 0.6472320556640625, 2.2573165893554688, 0.9276008605957031, 2.0814590454101562, 0.9408454895019531, 2.1279373168945312, 0.7689628601074219, 3.3235626220703125, 1.1227493286132812, 0.7644920349121094, 4.4469451904296875, 0.8420314788818359, -2.5635452270507812, 2.471668243408203, 1.2890090942382812, 1.8012161254882812, 0.7692070007324219, 2.3022842407226562, 1.1470508575439453, -0.29058837890625, 0.3914966583251953, 1.930755615234375, 0.6044349670410156, 1.6589279174804688, 3.6361541748046875, 0.8982162475585938, 0.3705482482910156], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000036.npy"} +{"epoch": 0.05286343612334802, "step": 37, "batch_size": 64, "mean": 1.0293034315109253, "std": 1.4328175783157349, "min": -2.095733642578125, "p10": -0.28441429138183594, "median": 0.6779975891113281, "p90": 2.705603027343751, "max": 7.14788818359375, "pos_frac": 0.796875, "sample": [1.4312515258789062, -0.009307861328125, 1.1065750122070312, 0.8648452758789062, 0.42024993896484375, -0.38726806640625, -0.28226470947265625, 0.50823974609375, 1.1799087524414062, -0.3335762023925781, 0.4241180419921875, 2.98077392578125, 0.3512535095214844, -0.8544921875, -0.1567535400390625, 0.0325927734375, 0.6114902496337891, 0.20654678344726562, 2.031169891357422, 7.14788818359375, 1.2593917846679688, 4.5882110595703125, 1.1909599304199219, 0.9761543273925781, 0.9228363037109375, 1.181365966796875, 0.5762481689453125, -0.043304443359375, 1.5014572143554688, 1.410064697265625, 0.6269760131835938, 0.20096588134765625, 0.3786125183105469, -0.8174362182617188, 0.8335800170898438, 0.21927642822265625, 0.29308319091796875, 2.503997802734375, 2.2884292602539062, 3.259063720703125, -0.2853355407714844, 2.7747344970703125, 0.7290191650390625, 1.9915390014648438, -0.1478424072265625, 0.7538108825683594, 0.10913658142089844, -0.1511993408203125, 0.7641754150390625, 1.5205307006835938, 2.2919387817382812, 0.1253509521484375, 2.5442962646484375, 2.318023681640625, 0.33307456970214844, 3.0209579467773438, 2.1486854553222656, -2.095733642578125, 0.3274993896484375, -0.4753265380859375, 2.4419212341308594, 3.7165069580078125, 0.2333831787109375, 0.2630958557128906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000037.npy"} +{"epoch": 0.05433186490455213, "step": 38, "batch_size": 64, "mean": 1.3506267070770264, "std": 1.5932660102844238, "min": -1.151092529296875, "p10": -0.06021270751953124, "median": 0.9063968658447266, "p90": 3.2411178588867195, "max": 7.55230712890625, "pos_frac": 0.859375, "sample": [0.139739990234375, 7.55230712890625, 3.9508056640625, 1.9984397888183594, 0.4754791259765625, 0.1420421600341797, 1.8151321411132812, -0.0876922607421875, 0.8700294494628906, 1.313507080078125, 1.691192626953125, 0.9409065246582031, 0.3334808349609375, 2.4189376831054688, 1.2813796997070312, -0.1633148193359375, 0.768768310546875, 0.7583541870117188, -0.04067420959472656, 2.7608871459960938, 0.9843978881835938, 1.2880401611328125, 2.9663429260253906, 0.12406730651855469, 3.4381103515625, 0.18255233764648438, 1.9091796875, 2.3646469116210938, 1.5941276550292969, 0.5821075439453125, 0.6740283966064453, 3.0061111450195312, -1.151092529296875, 0.9569320678710938, 0.4416522979736328, 0.444427490234375, 0.87188720703125, 6.3655242919921875, 0.49615478515625, 0.2643585205078125, 3.3418350219726562, 4.874664306640625, 1.7588653564453125, 0.6369400024414062, 0.7201042175292969, -0.08660125732421875, -0.064483642578125, 0.10150146484375, 0.8176116943359375, 1.3224639892578125, -0.5103225708007812, 0.13885498046875, -0.0502471923828125, 1.3688163757324219, 1.0336017608642578, 1.811004638671875, 1.5207366943359375, 5.152427673339844, 1.8731231689453125, 0.33002471923828125, 1.5546646118164062, -0.5812110900878906, 0.7878875732421875, 1.8645782470703125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000038.npy"} +{"epoch": 0.055800293685756244, "step": 39, "batch_size": 64, "mean": 1.6912683248519897, "std": 1.4913898706436157, "min": -2.484710693359375, "p10": 0.19280166625976564, "median": 1.3570976257324219, "p90": 3.8250688552856453, "max": 5.27679443359375, "pos_frac": 0.90625, "sample": [2.2682876586914062, 2.2466583251953125, 4.9700775146484375, 1.69842529296875, 1.6622314453125, -0.5492324829101562, 2.9535369873046875, 2.629364013671875, 1.1886005401611328, 2.2709484100341797, 0.8509063720703125, 3.2951583862304688, 1.677947998046875, 1.0758304595947266, 4.084617614746094, 0.8679122924804688, 0.6129913330078125, 0.7067947387695312, -0.41583251953125, 1.1036148071289062, 0.8682403564453125, 2.7929611206054688, 2.680908203125, 0.40004539489746094, 1.249908447265625, 3.0543136596679688, -0.220184326171875, 1.7199211120605469, 4.707000732421875, 0.32329559326171875, 0.18325042724609375, 0.5079097747802734, 2.8665695190429688, 2.509124755859375, -2.484710693359375, 2.1792373657226562, 0.33837318420410156, 1.9540328979492188, 3.6431140899658203, 5.27679443359375, 1.0738983154296875, 2.5807342529296875, 4.399559020996094, 1.19000244140625, 1.27642822265625, 0.5380134582519531, -0.4913177490234375, 0.5174407958984375, 1.4377670288085938, 0.7341709136962891, 1.08746337890625, 3.9030494689941406, 2.8978424072265625, 2.9286041259765625, 0.5480899810791016, 4.007354736328125, 3.31573486328125, 1.0627632141113281, 1.5650863647460938, 0.215087890625, 0.641082763671875, -0.014036178588867188, 0.7852954864501953, 2.2921104431152344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000039.npy"} +{"epoch": 0.05726872246696035, "step": 40, "batch_size": 64, "mean": 1.5692869424819946, "std": 1.7425506114959717, "min": -1.0187225341796875, "p10": -0.09466171264648438, "median": 1.1390457153320312, "p90": 3.915401077270508, "max": 7.773681640625, "pos_frac": 0.84375, "sample": [-0.46543121337890625, 1.0447616577148438, 0.6491241455078125, 1.3052635192871094, 1.1007728576660156, 0.5481796264648438, 0.575592041015625, 0.2881317138671875, 0.2953052520751953, 2.243213653564453, 7.064056396484375, 3.087127685546875, 1.1957836151123047, 3.3749160766601562, 1.4351119995117188, -0.4079399108886719, 1.9679718017578125, 2.2079925537109375, 0.007993698120117188, -1.0187225341796875, 2.62908935546875, -0.09271240234375, 0.717315673828125, -0.05035400390625, 2.1411819458007812, 1.6100921630859375, 0.2567253112792969, 0.8807964324951172, 0.5276412963867188, 0.2034759521484375, 4.525459289550781, -0.2376995086669922, 3.8830184936523438, 0.9102020263671875, 0.4029560089111328, 4.623443603515625, -0.000728607177734375, 3.929279327392578, 0.6964912414550781, 1.1773185729980469, 5.0361480712890625, 0.9401817321777344, 0.9704818725585938, 7.773681640625, -0.47878265380859375, 1.5689773559570312, -0.09549713134765625, 1.9097137451171875, 2.5505828857421875, 2.2818222045898438, 2.07928466796875, 2.575714111328125, 3.7797164916992188, 1.7250308990478516, 0.9728775024414062, 1.2478752136230469, 4.2503814697265625, -0.353271484375, 1.6648178100585938, 0.04396820068359375, 0.14272308349609375, 1.6831741333007812, 2.3652572631835938, 0.5673103332519531], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000040.npy"} +{"epoch": 0.05873715124816446, "step": 41, "batch_size": 64, "mean": 1.867814540863037, "std": 2.1468727588653564, "min": -2.99066162109375, "p10": 0.0148834228515628, "median": 1.2330875396728516, "p90": 4.230191040039062, "max": 9.173782348632812, "pos_frac": 0.890625, "sample": [1.0165901184082031, 1.2293891906738281, 3.4459075927734375, 1.6227874755859375, 0.6588516235351562, 2.1923599243164062, 3.467742919921875, 2.451946258544922, -0.7078704833984375, 3.1918106079101562, 6.77593994140625, 0.7316360473632812, 2.163330078125, 9.173782348632812, 0.3140850067138672, 0.53082275390625, 0.9668731689453125, 0.9459152221679688, 0.35729217529296875, -2.0039825439453125, 1.551605224609375, 3.6680030822753906, 4.155364990234375, 0.9745750427246094, 7.27728271484375, 1.1190223693847656, 4.2332763671875, 5.0833740234375, 0.5574417114257812, 0.3927574157714844, 2.0679779052734375, 0.423065185546875, 7.24444580078125, 0.8419647216796875, 0.8876724243164062, 3.5757827758789062, 1.236785888671875, 3.0764617919921875, -0.1128692626953125, 2.6625404357910156, 4.222991943359375, 1.9717578887939453, -2.99066162109375, 3.5142669677734375, 0.36981964111328125, 0.3129730224609375, 2.187255859375, 2.2740821838378906, 2.1054916381835938, 0.5083484649658203, 1.0560455322265625, 0.8237380981445312, 0.87933349609375, 0.6257553100585938, 0.7899494171142578, 1.4399585723876953, -0.5140304565429688, 3.5303421020507812, 1.2666168212890625, 0.3843345642089844, -0.6335887908935547, -0.38869476318359375, 1.6092948913574219, 4.7530059814453125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000041.npy"} +{"epoch": 0.06020558002936858, "step": 42, "batch_size": 64, "mean": 2.601499319076538, "std": 2.482137680053711, "min": -1.5631637573242188, "p10": 0.058539962768554825, "median": 2.057371139526367, "p90": 6.06165008544922, "max": 11.2684326171875, "pos_frac": 0.90625, "sample": [1.9669952392578125, -0.00115203857421875, 1.7317523956298828, 10.653854370117188, 11.2684326171875, 2.5374755859375, 0.37439727783203125, 5.76739501953125, 2.337158203125, 3.0301742553710938, 1.8641357421875, 6.225105285644531, 2.8072967529296875, 7.89630126953125, 0.8184986114501953, 1.442626953125, 2.7356185913085938, -0.02478790283203125, 2.8195877075195312, 3.1453781127929688, 1.3118820190429688, 1.2561054229736328, 4.304420471191406, 0.28687286376953125, 0.9688262939453125, 3.2270278930664062, -0.207489013671875, 2.496551513671875, 2.087200164794922, 0.0004119873046875, 2.5218162536621094, 5.693450927734375, 0.4211750030517578, 6.57672119140625, 2.5728988647460938, 1.6790771484375, 0.6576709747314453, 1.343667984008789, 2.0275421142578125, -0.7807388305664062, 4.0256195068359375, -0.3193817138671875, 1.61328125, 6.1877593994140625, 1.7148666381835938, 4.3771514892578125, 1.2054615020751953, 0.35773468017578125, 1.2013053894042969, 1.7784423828125, 0.8672676086425781, 6.2259521484375, -1.5631637573242188, 0.5750160217285156, 3.0130233764648438, 3.8147430419921875, 0.19417190551757812, 2.322845458984375, 4.8377532958984375, 2.5758819580078125, 3.9627227783203125, 5.01129150390625, 0.9318904876708984, 3.74298095703125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000042.npy"} +{"epoch": 0.06167400881057269, "step": 43, "batch_size": 64, "mean": 2.4315857887268066, "std": 1.9878431558609009, "min": -0.9192047119140625, "p10": 0.3126018524169922, "median": 2.2747802734375, "p90": 4.6076416015625, "max": 10.165512084960938, "pos_frac": 0.953125, "sample": [0.3265419006347656, 0.3136405944824219, 0.20690155029296875, 3.4606399536132812, 3.878173828125, 0.3504638671875, 4.395374298095703, 2.6136322021484375, 2.916301727294922, 6.545562744140625, 0.6748199462890625, 1.9992637634277344, 0.7799797058105469, 2.179962158203125, 2.826976776123047, 0.7228164672851562, 3.50439453125, 5.715736389160156, 1.9869003295898438, 0.3348522186279297, 6.4254150390625, 2.9957275390625, 5.494415283203125, 0.96392822265625, 2.9113845825195312, 2.546783447265625, 0.002582550048828125, 2.4062957763671875, -0.0057277679443359375, 4.6342010498046875, 1.5045547485351562, 2.633005142211914, 3.7610015869140625, 2.732025146484375, 2.0943527221679688, 3.826770782470703, 0.31215667724609375, 2.369598388671875, 1.8466911315917969, 0.31664276123046875, 1.7584075927734375, 1.835693359375, 6.816322326660156, 3.75164794921875, 0.8848953247070312, -0.9192047119140625, 1.0814075469970703, 2.87078857421875, 2.1641082763671875, 2.81304931640625, 2.4779396057128906, 3.4890518188476562, 2.1457901000976562, 0.5422458648681641, 1.9230804443359375, 10.165512084960938, 3.875946044921875, 1.3415031433105469, -0.18816757202148438, 0.5926780700683594, 2.5937728881835938, 4.5456695556640625, 3.2963027954101562, 0.25830078125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000043.npy"} +{"epoch": 0.0631424375917768, "step": 44, "batch_size": 64, "mean": 2.852534294128418, "std": 2.277489423751831, "min": -0.7397136688232422, "p10": 0.5274934768676758, "median": 2.4478912353515625, "p90": 5.83011474609375, "max": 10.12384033203125, "pos_frac": 0.953125, "sample": [4.01739501953125, 2.66168212890625, 1.6634235382080078, 0.5929775238037109, 7.108211517333984, 2.6771011352539062, 3.4503326416015625, 1.5716552734375, 2.3618392944335938, 5.6745758056640625, 6.6666717529296875, 0.13580322265625, 4.553436279296875, 5.3524627685546875, 5.402008056640625, 3.5977096557617188, 0.7060127258300781, 2.470550537109375, 2.3220996856689453, 5.6399383544921875, 0.5243206024169922, 3.2138671875, 2.5918216705322266, 7.81640625, 2.077484130859375, 3.21636962890625, 0.587158203125, 1.4232330322265625, 5.8967742919921875, 1.5241336822509766, 3.3856124877929688, -0.18680381774902344, 1.7420539855957031, 1.1157150268554688, 1.4707927703857422, 3.3350830078125, 0.8595733642578125, 1.667633056640625, 5.62579345703125, 2.42523193359375, 0.5348968505859375, 2.1155548095703125, 3.7483062744140625, 3.9282913208007812, 1.4972801208496094, 3.2861328125, 2.8569488525390625, 2.489288330078125, 0.5390338897705078, 0.1945343017578125, 7.049224853515625, 4.408111572265625, 1.3841552734375, 3.6110992431640625, 1.3681411743164062, 10.12384033203125, 0.7006416320800781, -0.3931121826171875, 3.899139404296875, 1.6872673034667969, -0.7397136688232422, 0.13648223876953125, 1.2730712890625, 7.925453186035156], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000044.npy"} +{"epoch": 0.06461086637298091, "step": 45, "batch_size": 64, "mean": 2.3724491596221924, "std": 2.6939775943756104, "min": -1.4713287353515625, "p10": -0.13217582702636715, "median": 1.758474349975586, "p90": 6.253539657592774, "max": 12.83599853515625, "pos_frac": 0.875, "sample": [3.58319091796875, 1.4715728759765625, -0.14655303955078125, 2.6266212463378906, 0.3130455017089844, 1.607248306274414, 4.152130126953125, 6.8252105712890625, 4.79595947265625, 3.5864410400390625, 0.6172637939453125, 8.017837524414062, 3.715005874633789, 0.4323310852050781, 0.0055179595947265625, 1.8329811096191406, 0.1092376708984375, 1.8689193725585938, 0.09159660339355469, 6.288578033447266, -0.10698699951171875, 0.37660789489746094, 2.9287261962890625, 0.6684379577636719, 0.6553153991699219, 5.788209915161133, 6.685188293457031, 4.4320526123046875, 0.2926158905029297, 0.450164794921875, 5.8739013671875, 2.865020751953125, 6.171783447265625, 0.2128753662109375, 0.0171661376953125, 1.7624969482421875, 6.389434814453125, 3.9784698486328125, -0.3838348388671875, 1.8008041381835938, 1.5559558868408203, 0.39972686767578125, -1.0993423461914062, -1.4713287353515625, 1.198089599609375, 12.83599853515625, 1.4975357055664062, 7.314613342285156, 3.4618682861328125, -1.029510498046875, 2.3369712829589844, 2.22344970703125, 1.7544517517089844, 0.45514869689941406, 1.26446533203125, 5.9888458251953125, 1.01239013671875, 2.443450927734375, 2.274463653564453, 2.2972869873046875, -0.14297103881835938, 3.4252548217773438, 0.5884552001953125, -1.4011077880859375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000045.npy"} +{"epoch": 0.06607929515418502, "step": 46, "batch_size": 64, "mean": 2.9789419174194336, "std": 3.2554383277893066, "min": -2.84442138671875, "p10": -0.022462081909179647, "median": 2.100412368774414, "p90": 7.0821464538574235, "max": 15.3477783203125, "pos_frac": 0.890625, "sample": [0.7752094268798828, 0.138397216796875, 5.169403076171875, 7.772430419921875, 3.8030853271484375, -2.4099044799804688, 1.357269287109375, 1.609201431274414, 7.247314453125, 3.489034652709961, 6.460109710693359, 1.659860610961914, 0.304779052734375, 2.2546768188476562, 4.920654296875, 0.1450958251953125, 0.06927490234375, 0.6449012756347656, 4.557075500488281, 12.336578369140625, -0.4546031951904297, 0.9861297607421875, -0.039340972900390625, 3.3837966918945312, 0.38254547119140625, 4.3345184326171875, 3.0732574462890625, 1.7240543365478516, 4.001091003417969, 2.2997798919677734, 4.6520233154296875, 0.4299468994140625, -2.84442138671875, 1.0653343200683594, 3.5968780517578125, 6.267127990722656, 5.495391845703125, -0.14467620849609375, 8.203399658203125, 0.3986358642578125, 0.555206298828125, 3.901092529296875, 5.470027923583984, -0.16904449462890625, 15.3477783203125, 0.0169219970703125, 6.085296630859375, 6.696754455566406, 0.7569789886474609, 2.7148208618164062, 1.9461479187011719, 8.4525146484375, 3.378023147583008, 0.06207275390625, 1.8302154541015625, 7.673713684082031, 1.2691917419433594, 2.5519752502441406, 3.2732467651367188, 5.463691711425781, 1.8579292297363281, 0.9894695281982422, 1.6287059783935547, -0.21576690673828125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000046.npy"} +{"epoch": 0.06754772393538913, "step": 47, "batch_size": 64, "mean": 2.989128351211548, "std": 2.697270154953003, "min": -2.40863037109375, "p10": 0.13678665161132814, "median": 2.2779693603515625, "p90": 6.631605529785157, "max": 10.777679443359375, "pos_frac": 0.9375, "sample": [2.24169921875, 3.0643997192382812, 0.24224853515625, 1.4349327087402344, 4.798824310302734, 1.5402755737304688, 0.5742950439453125, 1.4529571533203125, 6.9172210693359375, 2.922046661376953, 0.14694976806640625, 6.493743896484375, 6.400489807128906, 1.1474838256835938, 3.498638153076172, 2.6165618896484375, 0.07608795166015625, 1.571319580078125, 2.6593475341796875, 5.308662414550781, 6.46966552734375, 1.1033554077148438, 0.9250068664550781, 3.360748291015625, 3.8559417724609375, 1.6660537719726562, -1.4724578857421875, 6.6906890869140625, 7.930908203125, 2.8697509765625, -0.2588691711425781, 3.4453887939453125, 6.186496734619141, 4.37396240234375, -2.40863037109375, 0.018007278442382812, 0.48497772216796875, 1.4743919372558594, 1.2667312622070312, 9.56488037109375, 6.2013397216796875, 2.169157028198242, 1.5433082580566406, 3.65582275390625, 9.738327026367188, 6.75177001953125, 0.5062713623046875, 3.212339401245117, 10.777679443359375, -0.17183685302734375, 1.67523193359375, 2.043182373046875, 0.1324310302734375, 1.6487751007080078, 2.314239501953125, 1.2444229125976562, 5.113487243652344, 1.58404541015625, 3.4272308349609375, 2.049530029296875, 4.549468994140625, 3.924224853515625, 2.200695037841797, 2.3578853607177734], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000047.npy"} +{"epoch": 0.06901615271659324, "step": 48, "batch_size": 64, "mean": 3.1348774433135986, "std": 3.2677438259124756, "min": -7.3095855712890625, "p10": 0.15635910034179698, "median": 2.338470458984375, "p90": 7.89001922607422, "max": 13.0052490234375, "pos_frac": 0.90625, "sample": [4.0988311767578125, 0.6247406005859375, 8.033523559570312, 8.482421875, 7.1812286376953125, 2.9989547729492188, -0.7362213134765625, 2.4246749877929688, 5.610984802246094, 4.664924621582031, 1.9001312255859375, 6.071044921875, 2.4748764038085938, 0.397186279296875, 0.11357879638671875, 1.5762710571289062, 7.55517578125, -0.11893463134765625, 0.6042098999023438, -0.060276031494140625, 0.5604438781738281, 8.696929931640625, -0.0504608154296875, 2.2156448364257812, 6.9674072265625, 1.8186264038085938, 3.7718887329101562, 3.4219932556152344, 0.47559356689453125, 3.6694679260253906, 1.7778968811035156, 1.73968505859375, 3.98455810546875, 1.9150543212890625, 9.883010864257812, 3.8197269439697266, 4.8498382568359375, 10.559226989746094, 3.6341552734375, 1.2785377502441406, 1.0196762084960938, 1.3317203521728516, 1.1964263916015625, 2.9636688232421875, 0.2561798095703125, 4.270198822021484, 3.3561763763427734, 1.8563156127929688, -2.172119140625, 8.40237045288086, 3.024667739868164, 3.5534210205078125, 1.857696533203125, 13.0052490234375, 4.662864685058594, -7.3095855712890625, 5.954196929931641, 1.7732009887695312, 0.6534805297851562, 0.8284759521484375, 5.4233551025390625, 1.6843032836914062, 1.9013938903808594, 2.2522659301757812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000048.npy"} +{"epoch": 0.07048458149779736, "step": 49, "batch_size": 64, "mean": 3.8097660541534424, "std": 3.9657328128814697, "min": -3.8554840087890625, "p10": -0.020603561401367147, "median": 3.3138208389282227, "p90": 8.56391830444336, "max": 16.633575439453125, "pos_frac": 0.890625, "sample": [3.088245391845703, 0.6070213317871094, 0.4314002990722656, 3.0148391723632812, 3.0460853576660156, 5.750453948974609, 1.3576431274414062, 2.578632354736328, -1.8564300537109375, 0.7074050903320312, 0.2243061065673828, 2.2477245330810547, 3.1574172973632812, 9.661598205566406, 7.452972412109375, 3.5171966552734375, 4.184825897216797, 2.2560977935791016, 4.188819885253906, 1.3590469360351562, 2.7183189392089844, -3.1162776947021484, 3.8541717529296875, -0.03814506530761719, 14.397705078125, 10.263992309570312, 1.300323486328125, 5.7655487060546875, 3.5757904052734375, 0.6400547027587891, 8.188758850097656, 16.633575439453125, 4.7365875244140625, 1.1790924072265625, -0.99627685546875, -3.8554840087890625, 4.2640380859375, 3.781679153442383, 4.620292663574219, 4.858604431152344, 3.2798023223876953, 0.020326614379882812, -0.718536376953125, 4.656181335449219, 5.3984832763671875, 14.69317626953125, 2.528350830078125, 2.6305007934570312, 8.724700927734375, 7.2517242431640625, 6.9813079833984375, 3.34783935546875, 4.441993713378906, 5.3188323974609375, 12.95880126953125, 0.7202396392822266, -1.25335693359375, 3.6759109497070312, 1.8859176635742188, 3.9597091674804688, 5.4315948486328125, 1.6480140686035156, 4.907680511474609, 1.5881805419921875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000049.npy"} +{"epoch": 0.07195301027900147, "step": 50, "batch_size": 64, "mean": 4.163365364074707, "std": 4.100195407867432, "min": -3.2452163696289062, "p10": -0.5954093933105468, "median": 3.2005043029785156, "p90": 9.539332580566409, "max": 15.543975830078125, "pos_frac": 0.828125, "sample": [7.764556884765625, -0.0117645263671875, -2.35162353515625, 10.534835815429688, 1.3905677795410156, 3.0562591552734375, 8.004684448242188, 2.2074203491210938, 4.125370025634766, 1.8606500625610352, -0.9208221435546875, 7.5594482421875, 3.8653335571289062, 15.543975830078125, 0.3423919677734375, 2.453643798828125, 8.069351196289062, 3.2982635498046875, 9.8170166015625, 4.346435546875, 0.7199478149414062, 5.714630126953125, 10.304641723632812, 2.897216796875, 2.1210403442382812, 5.5832366943359375, -0.06096649169921875, -0.9180488586425781, 3.1027450561523438, 8.891403198242188, 6.8190765380859375, 5.540641784667969, 10.25747299194336, 15.367996215820312, -3.2452163696289062, 0.5849342346191406, 1.3140335083007812, 1.8939590454101562, 0.30876731872558594, 3.74755859375, 2.2046432495117188, 1.6460647583007812, 6.580528259277344, 4.6412811279296875, 7.977657318115234, 6.703239440917969, -0.5126571655273438, 1.9923439025878906, 5.5814361572265625, -0.6308746337890625, 6.053619384765625, 2.7764663696289062, 4.6439666748046875, 6.066307067871094, 2.6397247314453125, 6.785911560058594, 8.012046813964844, 2.1954803466796875, 13.145988464355469, -0.7833480834960938, 7.242420196533203, 1.6661300659179688, -1.941375732421875, -0.1327037811279297], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000050.npy"} +{"epoch": 0.07342143906020558, "step": 51, "batch_size": 64, "mean": 4.017845153808594, "std": 5.204706192016602, "min": -7.0954742431640625, "p10": -0.4073829650878906, "median": 2.8037109375, "p90": 10.65169372558594, "max": 20.295257568359375, "pos_frac": 0.828125, "sample": [9.928932189941406, 4.169303894042969, 0.8243083953857422, -0.9639854431152344, 0.76055908203125, 0.223846435546875, 9.613632202148438, -0.6189002990722656, 1.8452224731445312, -0.1319427490234375, 12.23846435546875, 7.573680877685547, 2.1122589111328125, 4.631034851074219, 2.3469600677490234, 5.044281005859375, 4.470844268798828, -1.2087554931640625, 10.961448669433594, 5.655216217041016, 4.145425796508789, -0.408905029296875, -0.03413963317871094, 3.45404052734375, 1.0870513916015625, 7.640483856201172, 0.7507095336914062, 5.995048522949219, 0.8511543273925781, 5.6573944091796875, 20.295257568359375, 3.0325698852539062, 0.4062461853027344, 5.4130096435546875, 3.2634124755859375, 15.432525634765625, 1.757537841796875, 15.356582641601562, 5.161293029785156, 2.1188182830810547, 2.804412841796875, 1.1265373229980469, 4.170642852783203, 9.211692810058594, 1.0135231018066406, 0.7321205139160156, 5.7174530029296875, -4.633941650390625, -0.27681922912597656, 19.167022705078125, 14.530197143554688, 7.69049072265625, 7.910335540771484, -0.40383148193359375, 1.5182609558105469, 6.471977233886719, 0.0061054229736328125, 0.28725433349609375, 2.411834716796875, -7.0954742431640625, 2.8879776000976562, -1.77227783203125, 2.803009033203125, 0.0116729736328125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000051.npy"} +{"epoch": 0.07488986784140969, "step": 52, "batch_size": 64, "mean": 6.196599006652832, "std": 5.214123725891113, "min": -2.4729690551757812, "p10": 1.103017807006836, "median": 4.75640869140625, "p90": 13.493393707275391, "max": 22.800430297851562, "pos_frac": 0.921875, "sample": [1.6666145324707031, 13.226455688476562, 4.645111083984375, 12.957672119140625, 7.5622406005859375, 10.993743896484375, 3.1026344299316406, 7.26751708984375, 7.688873291015625, 14.18267822265625, -0.38004302978515625, 13.607795715332031, 4.6814727783203125, 5.558067321777344, 9.485116958618164, 8.359121322631836, 3.063495635986328, 5.454904556274414, 18.136192321777344, 9.569480895996094, 3.7314109802246094, 1.9317474365234375, 13.194183349609375, 3.112506866455078, 0.16307830810546875, 7.941947937011719, 3.2969512939453125, 2.004636764526367, 9.066267013549805, 22.800430297851562, 9.465850830078125, 1.5529823303222656, 11.030197143554688, 4.5737762451171875, 4.356189727783203, 14.010101318359375, 4.117992401123047, -1.255950927734375, 4.8313446044921875, -2.3230514526367188, -2.4729690551757812, 2.356597900390625, 6.746679306030273, 5.900917053222656, 2.095684051513672, 9.269775390625, 4.251686096191406, 14.15557861328125, 7.321247100830078, 1.6957664489746094, 1.0676155090332031, 3.73291015625, 3.787464141845703, 5.433628082275391, 2.9558563232421875, 9.917217254638672, 6.301525115966797, 1.1856231689453125, 18.886131286621094, -1.3941497802734375, 6.4700164794921875, 4.414726257324219, 2.579439163208008, 1.4916210174560547], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000052.npy"} +{"epoch": 0.0763582966226138, "step": 53, "batch_size": 64, "mean": 6.325778007507324, "std": 6.903669357299805, "min": -3.0708999633789062, "p10": 0.523998260498047, "median": 4.254323959350586, "p90": 16.2981315612793, "max": 30.56524658203125, "pos_frac": 0.921875, "sample": [-1.9428482055664062, 4.151115417480469, 3.4553680419921875, 3.264801025390625, 1.0823631286621094, 3.7176342010498047, 13.732398986816406, 0.292694091796875, 8.293594360351562, -1.8850250244140625, 2.695526123046875, 29.661224365234375, -2.9641075134277344, 0.6371383666992188, 7.182159423828125, 7.773929595947266, 1.8636322021484375, 11.982040405273438, 3.0870437622070312, 12.725715637207031, 4.557991027832031, 6.1599273681640625, 13.625350952148438, 7.0848236083984375, 17.195030212402344, 1.6130905151367188, 5.008049011230469, 0.86798095703125, 5.729450225830078, 1.8971939086914062, 19.31365966796875, 9.107707977294922, 7.673175811767578, 8.754920959472656, 3.7675132751464844, 0.494232177734375, 0.5934524536132812, 4.244075775146484, 1.0302696228027344, 4.548887252807617, 4.2645721435546875, 8.029268264770508, 1.8793220520019531, 15.376426696777344, -3.0708999633789062, 17.398223876953125, 19.44085693359375, 3.2904052734375, 5.3011627197265625, 3.5430831909179688, 1.9848213195800781, 4.456943511962891, 2.698822021484375, 1.3422698974609375, 3.6613845825195312, 4.546802520751953, 3.0874576568603516, 30.56524658203125, 2.805889129638672, 16.323333740234375, 4.954368591308594, 5.1036376953125, 16.23932647705078, -0.4460926055908203], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000053.npy"} +{"epoch": 0.07782672540381791, "step": 54, "batch_size": 64, "mean": 5.348155975341797, "std": 5.101955890655518, "min": -1.217885971069336, "p10": 0.31595420837402344, "median": 4.3403520584106445, "p90": 11.947212982177735, "max": 22.118820190429688, "pos_frac": 0.9375, "sample": [8.4288330078125, 5.90570068359375, 8.705440521240234, 0.841827392578125, 0.7428035736083984, 3.294189453125, 0.3109016418457031, 2.7368030548095703, 6.350318908691406, 8.077495574951172, 0.9618701934814453, 6.3130035400390625, 12.673927307128906, 1.9949188232421875, 4.2920989990234375, 11.761909484863281, -0.04087257385253906, 5.216611862182617, 5.663694381713867, 5.5718994140625, -0.010646820068359375, 0.7324752807617188, 17.35938262939453, 0.3277435302734375, 7.993614196777344, 0.11254119873046875, 5.057926177978516, 4.6060333251953125, 6.2200927734375, 6.836570739746094, 1.7481555938720703, -0.3606376647949219, 8.341781616210938, 0.10908889770507812, 1.0015220642089844, 3.3203697204589844, 11.869117736816406, 2.8109054565429688, 5.2220916748046875, 13.555572509765625, 1.7007427215576172, 2.6218795776367188, 22.118820190429688, 10.7506103515625, -1.217885971069336, 3.2128849029541016, 0.3652381896972656, 13.889404296875, 0.7051410675048828, 4.54888916015625, 1.4121170043945312, 4.388605117797852, 11.570770263671875, 1.8563766479492188, 2.6375579833984375, 19.296661376953125, 1.0257987976074219, 5.384105682373047, 2.1015243530273438, 11.980682373046875, 2.5618972778320312, 10.412841796875, 1.2961406707763672, 11.004081726074219], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000054.npy"} +{"epoch": 0.07929515418502203, "step": 55, "batch_size": 64, "mean": 6.541542053222656, "std": 7.597686767578125, "min": -11.797103881835938, "p10": -1.5170097351074217, "median": 5.444938659667969, "p90": 16.815843200683595, "max": 28.58837890625, "pos_frac": 0.8125, "sample": [7.281639099121094, 10.504524230957031, 5.502403259277344, -5.346221923828125, 26.084747314453125, -2.1622848510742188, 8.087364196777344, 4.341133117675781, 17.565200805664062, -1.0333938598632812, 6.822357177734375, 6.330863952636719, 2.3822097778320312, 11.442985534667969, 20.837860107421875, 9.856155395507812, 5.327484130859375, 9.88970947265625, 28.58837890625, 3.5264129638671875, 11.62359619140625, -1.70001220703125, 4.755226135253906, 9.048210144042969, 24.932464599609375, 0.08758735656738281, 17.093948364257812, 0.10195159912109375, 0.7790966033935547, 8.85848617553711, 4.949333190917969, -1.898468017578125, 6.703174591064453, 0.05930137634277344, 13.02178955078125, 5.0479278564453125, -1.6079902648925781, -0.24721527099609375, 0.19149017333984375, -5.2381134033203125, 10.432605743408203, 3.8825740814208984, 2.1756134033203125, 15.829841613769531, 6.7635345458984375, -1.3047218322753906, 0.6938514709472656, 11.474334716796875, 5.315032958984375, -11.797103881835938, 3.175861358642578, 16.16693115234375, 7.1371002197265625, 4.191143035888672, 7.831695556640625, 5.847484588623047, -0.9326820373535156, 8.785873413085938, 7.852783203125, 14.299148559570312, 5.387474060058594, -0.160736083984375, 5.1598358154296875, 18.05988311767578], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000055.npy"} +{"epoch": 0.08076358296622614, "step": 56, "batch_size": 64, "mean": 6.195199012756348, "std": 7.477856636047363, "min": -10.192279815673828, "p10": -1.8515127182006832, "median": 5.262235641479492, "p90": 16.39516601562501, "max": 28.020172119140625, "pos_frac": 0.8125, "sample": [-2.94073486328125, 7.6698150634765625, -3.589263916015625, 8.600120544433594, -0.5028171539306641, -10.192279815673828, 11.347419738769531, -1.4351520538330078, 3.7860031127929688, -3.959278106689453, 3.6470947265625, 1.8753242492675781, 5.136863708496094, 17.395233154296875, 2.4052200317382812, 0.06356430053710938, -0.22560691833496094, 11.970802307128906, 10.635517120361328, 8.250892639160156, -2.0299530029296875, 11.769302368164062, 3.3399505615234375, 24.072906494140625, 9.2095947265625, 17.235382080078125, 1.1224498748779297, 0.21138763427734375, 9.87367057800293, 2.8678455352783203, 7.914276123046875, 7.667194366455078, 9.835807800292969, 1.6998538970947266, 11.368873596191406, 12.346988677978516, 0.20641708374023438, 19.853790283203125, 7.603790283203125, 6.515388488769531, 6.035297393798828, 5.21026611328125, 3.0262317657470703, 21.199371337890625, 7.68359375, 13.377632141113281, 6.410650253295898, 11.953414916992188, 0.4239959716796875, 28.020172119140625, 1.6940250396728516, -0.021638870239257812, 4.160865783691406, 14.434661865234375, -8.06903076171875, 3.307069778442383, -0.683319091796875, 21.918487548828125, 5.314205169677734, -3.6510467529296875, 3.2399673461914062, 2.1170578002929688, 6.3523101806640625, 10.414840698242188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000056.npy"} +{"epoch": 0.08223201174743025, "step": 57, "batch_size": 64, "mean": 6.902283668518066, "std": 6.7265801429748535, "min": -11.818206787109375, "p10": 0.10590305328369173, "median": 6.822113037109375, "p90": 15.08910140991211, "max": 23.783004760742188, "pos_frac": 0.890625, "sample": [8.943416595458984, 3.328969955444336, 0.5268974304199219, -3.0135498046875, 11.4873046875, -11.818206787109375, 11.997161865234375, 0.4315299987792969, 14.207122802734375, 6.78643798828125, 17.75848388671875, 4.001762390136719, 23.783004760742188, 3.2599029541015625, 7.541831970214844, 7.158233642578125, -1.4504528045654297, 3.8139991760253906, 7.037223815917969, 21.265396118164062, 10.906257629394531, 18.334671020507812, 12.38031005859375, 4.338268280029297, 7.421913146972656, 1.482290267944336, 15.221923828125, 5.444917678833008, 4.324028015136719, 2.4723968505859375, 1.07672119140625, 4.292692184448242, 3.332061767578125, -0.03365135192871094, 6.8577880859375, 8.260345458984375, -0.44620513916015625, 1.4251670837402344, 2.882425308227539, 8.919845581054688, 13.268081665039062, 2.134553909301758, 10.514450073242188, 6.21826171875, 9.826156616210938, -2.9061317443847656, 9.559921264648438, 7.3890380859375, 9.27667236328125, 3.5583953857421875, 2.4605846405029297, 19.942535400390625, 5.659290313720703, 3.89678955078125, 7.4914703369140625, 4.351707458496094, 0.7591934204101562, 12.883522033691406, 13.269935607910156, -6.981513977050781, 14.779182434082031, 22.184860229492188, 7.955556869506836, 8.313026428222656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000057.npy"} +{"epoch": 0.08370044052863436, "step": 58, "batch_size": 64, "mean": 7.606607437133789, "std": 8.165129661560059, "min": -10.152931213378906, "p10": -0.5155685424804681, "median": 5.919193267822266, "p90": 18.29417572021485, "max": 29.246353149414062, "pos_frac": 0.890625, "sample": [15.936080932617188, -2.107076644897461, 0.46447181701660156, 5.265361785888672, 3.3338470458984375, 4.318328857421875, 21.831947326660156, 2.0912857055664062, 15.412361145019531, 3.0327224731445312, 6.765773773193359, 6.7300262451171875, 8.333450317382812, 4.064970016479492, 2.7307357788085938, 12.924434661865234, 11.371528625488281, 9.950546264648438, 4.890159606933594, 3.325990676879883, 8.502288818359375, 15.701705932617188, 14.556747436523438, -1.3351974487304688, 12.424690246582031, 1.6355133056640625, 10.009265899658203, 6.796220779418945, 23.803817749023438, 4.107866287231445, 29.246353149414062, 8.890453338623047, 6.573024749755859, 0.03989410400390625, 16.563034057617188, 0.8000240325927734, 15.751693725585938, 4.458965301513672, 3.2061004638671875, -2.8338470458984375, 2.613506317138672, 3.0724658966064453, 25.879486083984375, -2.5120697021484375, 11.357452392578125, -0.7536239624023438, 1.0633659362792969, 7.3648834228515625, -6.42193603515625, 25.989349365234375, 2.0818042755126953, 6.9104766845703125, 0.8712863922119141, 15.47216796875, 11.456069946289062, -10.152931213378906, 22.6253662109375, 1.011749267578125, 2.54608154296875, 8.016685485839844, 16.96973419189453, 2.05126953125, 0.8828964233398438, 18.861793518066406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000058.npy"} +{"epoch": 0.08516886930983847, "step": 59, "batch_size": 64, "mean": 8.48550796508789, "std": 8.816734313964844, "min": -8.84246826171875, "p10": -1.3104663848876945, "median": 7.137125015258789, "p90": 21.697581481933593, "max": 35.818206787109375, "pos_frac": 0.875, "sample": [9.58697509765625, 10.224563598632812, 17.913421630859375, 21.7598876953125, 17.642799377441406, 4.7587738037109375, 35.818206787109375, 22.840118408203125, 7.804483413696289, 18.497177124023438, 0.8656806945800781, 7.402362823486328, -4.748832702636719, 3.0130481719970703, 24.68505859375, -0.6302986145019531, 2.6098241806030273, 15.39691162109375, 6.70794677734375, 22.909713745117188, 1.3611087799072266, 2.5678863525390625, 1.4872970581054688, 1.467803955078125, 5.21112060546875, 28.801406860351562, 2.836650848388672, 2.077066421508789, 9.104904174804688, -8.84246826171875, 10.180953979492188, -1.6348533630371094, 9.957954406738281, 8.445625305175781, 9.740737915039062, 20.21930694580078, 3.763336181640625, 13.3072509765625, 8.230941772460938, 3.9709091186523438, -6.411796569824219, -4.133380889892578, 6.181549072265625, -1.6019668579101562, 4.93293571472168, 10.628211975097656, 7.51957893371582, 3.1489486694335938, 8.611701965332031, 2.960355758666992, 1.9563789367675781, 6.87188720703125, 3.203367233276367, 11.192935943603516, 6.52415657043457, 16.47795867919922, 10.906982421875, 4.64849853515625, 4.03240966796875, -2.7223129272460938, 19.35271453857422, 21.552200317382812, 7.731746673583984, 22.194732666015625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000059.npy"} +{"epoch": 0.08663729809104258, "step": 60, "batch_size": 64, "mean": 6.686439514160156, "std": 7.756203651428223, "min": -16.633148193359375, "p10": -2.1424306869506835, "median": 7.36073112487793, "p90": 13.988619232177735, "max": 27.509864807128906, "pos_frac": 0.828125, "sample": [8.459182739257812, -5.6185302734375, 2.118865966796875, 12.975883483886719, 1.2719917297363281, 3.520679473876953, 12.87717056274414, 8.716339111328125, 0.844146728515625, -16.633148193359375, 7.390598297119141, -2.1610107421875, 2.1602249145507812, 4.073949813842773, 12.620384216308594, 7.0929107666015625, 4.881439208984375, 12.982421875, 4.451698303222656, 11.508934020996094, 13.863868713378906, 14.527297973632812, 21.2705078125, 10.1064453125, 8.825698852539062, 8.378250122070312, -0.75140380859375, 7.674217224121094, 12.1988525390625, 0.13675689697265625, 11.348682403564453, 20.10779571533203, 4.405979156494141, 6.990631103515625, 8.83755111694336, -2.1577091217041016, 11.015617370605469, 8.973281860351562, 2.4088668823242188, -6.6862945556640625, 13.083454132080078, 3.523303985595703, 6.61083984375, 9.510330200195312, 4.969154357910156, -1.0380096435546875, -12.606441497802734, 18.08880615234375, 22.737442016601562, 12.7137451171875, 14.042083740234375, 3.1203460693359375, -2.44464111328125, -1.6628646850585938, 12.031322479248047, 7.330863952636719, 27.509864807128906, 10.76983642578125, 1.1280517578125, 10.330404281616211, -2.106781005859375, 9.217582702636719, 4.739740371704102, 1.3246726989746094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000060.npy"} +{"epoch": 0.0881057268722467, "step": 61, "batch_size": 64, "mean": 8.251806259155273, "std": 11.42257308959961, "min": -11.391258239746094, "p10": -1.822412872314453, "median": 5.608554840087891, "p90": 20.739484405517583, "max": 63.036376953125, "pos_frac": 0.78125, "sample": [3.1688480377197266, 1.9271221160888672, -1.3568649291992188, 0.39003753662109375, 17.361358642578125, -0.26407814025878906, -0.042781829833984375, 8.683723449707031, -1.8062362670898438, 28.88922119140625, 25.151771545410156, 22.45538330078125, 29.17474365234375, 15.149360656738281, 9.170394897460938, 10.832115173339844, -1.6504974365234375, 1.5334415435791016, 12.748458862304688, 13.293136596679688, 3.524707794189453, 5.668952941894531, 18.053604125976562, 9.573131561279297, 6.9686279296875, 7.410074234008789, 5.5479278564453125, 6.68817138671875, 10.064018249511719, 9.608482360839844, -0.5720748901367188, -1.829345703125, 13.373146057128906, 19.712005615234375, 19.25, 25.47479248046875, 19.5472412109375, 1.4660224914550781, -2.07568359375, 14.155685424804688, 3.1213951110839844, 0.32712364196777344, 14.514205932617188, 3.10235595703125, 0.9226360321044922, 21.179832458496094, 1.11639404296875, 4.1058807373046875, 63.036376953125, 14.552391052246094, 14.373237609863281, -1.9690971374511719, -11.391258239746094, 13.955497741699219, 2.56646728515625, -5.9608306884765625, 5.54815673828125, 2.9368057250976562, -8.654556274414062, 3.227874755859375, 7.893798828125, -6.966220855712891, -1.4332275390625, 1.592193603515625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000061.npy"} +{"epoch": 0.08957415565345081, "step": 62, "batch_size": 64, "mean": 6.19963264465332, "std": 8.31264877319336, "min": -11.350799560546875, "p10": -3.7055643081665037, "median": 5.796904563903809, "p90": 16.826065063476566, "max": 33.854042053222656, "pos_frac": 0.84375, "sample": [9.83270263671875, 1.4239959716796875, 0.7509164810180664, 12.005378723144531, 5.72535514831543, 13.66326904296875, 8.314640045166016, 0.9784393310546875, 9.863471984863281, -6.0883636474609375, 16.232620239257812, 7.252204895019531, 1.912527084350586, -0.24002647399902344, 12.035133361816406, 4.286170959472656, 10.91436767578125, 6.337982177734375, 0.3525352478027344, 5.9503173828125, 7.160442352294922, 7.886486053466797, 1.7104110717773438, 5.916797637939453, -5.050811767578125, 9.915390014648438, 4.840488433837891, 9.35223388671875, -0.36562156677246094, 33.854042053222656, 18.686790466308594, 9.993247985839844, 7.109657287597656, 2.5462799072265625, 0.8797073364257812, 6.280693054199219, 1.5647048950195312, 20.399185180664062, -11.350799560546875, -4.63397216796875, -6.906494140625, 0.8267536163330078, 1.0824165344238281, 1.0285606384277344, 1.8219451904296875, 10.283502578735352, 17.080398559570312, 10.353076934814453, -3.7665767669677734, 17.720672607421875, 8.871448516845703, 5.77290153503418, -9.207595825195312, 4.221466064453125, 14.089447021484375, 3.8624191284179688, 3.708385467529297, 3.9838638305664062, 30.323471069335938, 22.871353149414062, 7.423004150390625, 0.8753700256347656, -3.563201904296875, 5.8209075927734375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000062.npy"} +{"epoch": 0.09104258443465492, "step": 63, "batch_size": 64, "mean": 8.366212844848633, "std": 9.040850639343262, "min": -14.1954345703125, "p10": -0.7275726318359369, "median": 7.814731597900391, "p90": 18.971240234375006, "max": 36.428436279296875, "pos_frac": 0.875, "sample": [27.537765502929688, 9.115875244140625, 5.067602157592773, 9.643909454345703, 4.070892333984375, 1.2651290893554688, 5.513023376464844, -8.974639892578125, 7.263755798339844, 7.500679016113281, -14.1954345703125, 17.767303466796875, 9.829444885253906, 7.956733703613281, 3.1802101135253906, -0.9778499603271484, 19.487213134765625, 4.712759017944336, 14.705413818359375, 22.502487182617188, 13.700302124023438, 15.498931884765625, 7.005851745605469, 0.0053195953369140625, -0.9775848388671875, 12.47987174987793, -4.421836853027344, 0.16168594360351562, 5.7081298828125, 3.499530792236328, 14.447860717773438, 9.549690246582031, 8.027030944824219, 10.434192657470703, 11.778709411621094, 13.388504028320312, 16.17015838623047, 15.241050720214844, 5.859004974365234, 2.080841064453125, -0.1442108154296875, 14.699722290039062, 3.056680679321289, 5.950401306152344, -12.831901550292969, 14.844053268432617, 8.828369140625, 1.498748779296875, 1.3251495361328125, 7.6727294921875, 2.9940643310546875, 16.336715698242188, 20.476577758789062, 8.750221252441406, 19.6114501953125, 0.9282150268554688, 9.955923080444336, 36.428436279296875, -4.0948028564453125, 16.280109405517578, 4.247901916503906, 28.8133544921875, 4.727272033691406, 12.472885131835938], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000063.npy"} +{"epoch": 0.09251101321585903, "step": 64, "batch_size": 64, "mean": 9.82172966003418, "std": 11.361506462097168, "min": -20.482070922851562, "p10": -1.27231216430664, "median": 8.22147274017334, "p90": 24.11358795166016, "max": 52.93353271484375, "pos_frac": 0.796875, "sample": [0.7645416259765625, 19.930511474609375, 30.181777954101562, 8.672168731689453, 5.603782653808594, 13.191261291503906, 3.056131362915039, 18.473716735839844, -0.3956279754638672, 7.7022705078125, 2.4156036376953125, 11.650726318359375, 0.4018440246582031, 5.7241058349609375, 13.440299987792969, 34.75785827636719, 5.309268951416016, -2.1266441345214844, 20.08959197998047, 8.443361282348633, 12.202720642089844, -3.1072463989257812, 31.946914672851562, 7.999584197998047, -0.43831443786621094, 6.726280212402344, -1.8232669830322266, 7.853208541870117, -20.482070922851562, -0.48636817932128906, 8.575286865234375, -1.535797119140625, 52.93353271484375, 6.312650680541992, 1.9304695129394531, 11.83144760131836, 19.733978271484375, 23.150863647460938, 11.544097900390625, 6.286231994628906, 6.44133186340332, 24.703598022460938, 13.561958312988281, 10.363922119140625, -0.46337890625, -10.950157165527344, 26.278717041015625, 3.7201385498046875, 24.52618408203125, -0.6860504150390625, 17.362869262695312, 13.945068359375, 14.346305847167969, -1.5235671997070312, 15.089469909667969, -0.35811424255371094, 5.352779388427734, 8.651317596435547, 15.255645751953125, 13.133346557617188, 1.6118850708007812, 13.490432739257812, 19.908035278320312, 6.388214111328125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000064.npy"} +{"epoch": 0.09397944199706314, "step": 65, "batch_size": 64, "mean": 9.843679428100586, "std": 11.074901580810547, "min": -25.3094482421875, "p10": -1.0071685791015623, "median": 8.31739616394043, "p90": 24.64920959472657, "max": 34.74913024902344, "pos_frac": 0.828125, "sample": [34.74913024902344, 27.152801513671875, 21.138275146484375, 13.221473693847656, 6.00031852722168, 10.749588012695312, 0.13719940185546875, 14.221458435058594, 3.990741729736328, 22.632789611816406, 17.42005157470703, 2.0376644134521484, -0.26520729064941406, 25.434539794921875, 31.332382202148438, 1.2137985229492188, 11.307369232177734, 2.691638946533203, -4.68206787109375, 5.663490295410156, 0.7986831665039062, 19.839126586914062, 7.526771545410156, -25.3094482421875, 18.574783325195312, 9.7593994140625, -4.703521728515625, 8.334121704101562, 15.765907287597656, 5.901782989501953, 5.652992248535156, 5.652973175048828, -2.541759490966797, 5.586214065551758, 32.05439758300781, 8.300670623779297, 22.741966247558594, 17.60519027709961, 27.51471710205078, 8.60921859741211, -1.0384521484375, -14.045135498046875, 10.420120239257812, 2.467214584350586, 9.029327392578125, 20.821273803710938, 27.161354064941406, -2.74017333984375, 5.78692626953125, 22.772865295410156, 13.290115356445312, 5.4484100341796875, 22.8167724609375, -0.934173583984375, 13.12701416015625, 9.96954345703125, 6.3671722412109375, 2.3686981201171875, 17.8892822265625, 9.903656005859375, -0.32607269287109375, 8.290752410888672, -0.07962989807128906, 7.416971206665039], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000065.npy"} +{"epoch": 0.09544787077826726, "step": 66, "batch_size": 64, "mean": 8.99482536315918, "std": 11.073993682861328, "min": -11.431182861328125, "p10": -1.1466915130615234, "median": 7.3815813064575195, "p90": 20.896685409545903, "max": 52.07997131347656, "pos_frac": 0.84375, "sample": [8.553215026855469, 52.07997131347656, 0.3626251220703125, 16.919456481933594, 14.028610229492188, 13.430046081542969, 1.9204578399658203, -0.3387184143066406, 10.910287857055664, 8.04791259765625, -9.910400390625, 2.349740982055664, 7.401735305786133, -1.1923980712890625, 13.078594207763672, 4.0394134521484375, 5.131780624389648, 0.19786643981933594, 2.3186683654785156, 21.294815063476562, 8.670066833496094, 11.389934539794922, 0.5678138732910156, 11.295578002929688, 18.712947845458984, 2.1756210327148438, 5.651161193847656, -3.7758216857910156, -4.161325454711914, -11.431182861328125, 12.207710266113281, 18.538352966308594, 19.967716217041016, 8.608001708984375, 3.5211029052734375, 13.007171630859375, 6.677114486694336, -0.43767356872558594, 2.3387413024902344, 15.664749145507812, 11.594078063964844, 3.5649490356445312, -1.0400428771972656, 7.361427307128906, 6.306056976318359, -8.909004211425781, 10.333656311035156, 1.4815521240234375, -4.673492431640625, 14.993431091308594, 4.708518981933594, 22.81536865234375, 30.375961303710938, 23.669456481933594, 5.267913818359375, 19.055984497070312, 2.2236595153808594, 9.247783660888672, 17.36400604248047, 2.5066661834716797, 36.809844970703125, 1.9619255065917969, 34.15118408203125, 14.686492919921875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000066.npy"} +{"epoch": 0.09691629955947137, "step": 67, "batch_size": 64, "mean": 9.2771635055542, "std": 11.012858390808105, "min": -14.729263305664062, "p10": -1.2801784515380858, "median": 6.914272308349609, "p90": 23.022106170654297, "max": 51.16180419921875, "pos_frac": 0.828125, "sample": [24.300491333007812, 4.411960601806641, 22.61646270751953, -0.4908599853515625, 2.355318069458008, -0.3875732421875, 18.539535522460938, 5.632440567016602, -6.267997741699219, 2.2320938110351562, 18.586200714111328, 10.629295349121094, 4.647308349609375, 17.99786376953125, -0.6185150146484375, -4.72566032409668, 10.396743774414062, 10.912399291992188, 1.8139114379882812, -14.729263305664062, 23.195953369140625, 9.555938720703125, -1.1672706604003906, 0.26160430908203125, 6.18524169921875, 1.8563385009765625, 23.58246612548828, 7.712369918823242, -1.3285675048828125, 6.88494873046875, 2.2126922607421875, 16.64992904663086, 7.822971343994141, 3.111787796020508, 2.6628494262695312, 10.153060913085938, 15.885339736938477, 6.363672256469727, 1.5104446411132812, -5.8446197509765625, 6.661674499511719, 19.481582641601562, 4.894630432128906, 3.4858264923095703, 29.2427978515625, 15.710365295410156, 6.99476432800293, 19.607215881347656, 15.071918487548828, 16.02978515625, 12.895957946777344, 6.176609039306641, 12.668359756469727, 36.57611083984375, 6.943595886230469, 1.2806682586669922, 51.16180419921875, 3.081552505493164, -2.7832412719726562, 28.268600463867188, 22.06756591796875, 8.319194793701172, -4.1589202880859375, 8.940715789794922], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000067.npy"} +{"epoch": 0.09838472834067548, "step": 68, "batch_size": 64, "mean": 8.720624923706055, "std": 9.045540809631348, "min": -8.624038696289062, "p10": -0.11763000488281167, "median": 6.5119218826293945, "p90": 22.16165618896485, "max": 40.67176818847656, "pos_frac": 0.890625, "sample": [-5.8456268310546875, 2.764007568359375, 26.5841064453125, 9.898574829101562, 5.636892318725586, -6.6906280517578125, 9.841796875, 23.76605224609375, 1.7211990356445312, 0.699371337890625, 14.328201293945312, 15.611663818359375, 19.572540283203125, 12.844200134277344, 11.587394714355469, -0.8369979858398438, 6.905612945556641, 13.538127899169922, 4.444206237792969, 9.068382263183594, -1.9643020629882812, 5.220457077026367, 5.865777969360352, 2.554229736328125, 7.991203308105469, 12.729393005371094, 19.6685791015625, 8.504302978515625, 9.565719604492188, 20.557144165039062, 22.84930419921875, 24.149005889892578, 1.8281402587890625, 6.906272888183594, 10.749977111816406, 1.9155941009521484, -0.4677734375, 6.118230819702148, 2.7761154174804688, 5.546852111816406, 18.318004608154297, 2.4803466796875, 11.436309814453125, -8.624038696289062, 7.271034240722656, 5.441596984863281, 4.204460144042969, 23.12236785888672, 2.4542083740234375, 1.7357501983642578, 2.224336624145508, 40.67176818847656, 26.9986572265625, 17.87677001953125, 14.784896850585938, 1.0015716552734375, 2.5213623046875, 4.971330642700195, 12.061622619628906, -2.35223388671875, 4.620086669921875, 2.7425880432128906, 5.1573944091796875, 8.496490478515625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000068.npy"} +{"epoch": 0.09985315712187959, "step": 69, "batch_size": 64, "mean": 10.385248184204102, "std": 10.408534049987793, "min": -5.082916259765625, "p10": -0.6331836700439449, "median": 7.135639190673828, "p90": 22.245738220214847, "max": 48.047393798828125, "pos_frac": 0.875, "sample": [4.5736236572265625, 6.833095550537109, 32.725555419921875, 8.990036010742188, 12.889068603515625, 17.720619201660156, -5.082916259765625, -1.4947662353515625, -0.2591590881347656, 4.604209899902344, 5.68475341796875, 9.70804214477539, -1.269775390625, 10.119209289550781, 7.025363922119141, 15.703269958496094, 28.820907592773438, 7.721931457519531, 19.170379638671875, 16.175430297851562, -1.8064842224121094, 26.03289794921875, 13.575546264648438, 10.238750457763672, 20.43523406982422, 14.82257080078125, 4.369789123535156, 16.727691650390625, 13.278003692626953, 6.000825881958008, 0.5783805847167969, 0.3966560363769531, 20.175689697265625, 48.047393798828125, 4.860103607177734, 21.160736083984375, 5.93804931640625, 16.33984375, 19.141860961914062, 1.1719436645507812, 1.6961517333984375, 17.618881225585938, 11.188339233398438, 1.5008163452148438, -0.7934799194335938, 15.393966674804688, 7.245914459228516, 39.41881561279297, 4.34588623046875, 8.497011184692383, 6.671989440917969, -5.007900238037109, 6.395927429199219, 3.4221343994140625, 2.4389801025390625, 6.779998779296875, 23.138824462890625, 22.710739135742188, 1.79193115234375, 20.502281188964844, 1.9581832885742188, 1.3828849792480469, 5.768239974975586, -1.2549896240234375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000069.npy"} +{"epoch": 0.1013215859030837, "step": 70, "batch_size": 64, "mean": 11.182601928710938, "std": 11.99088191986084, "min": -10.09375, "p10": -1.1931610107421875, "median": 10.888038635253906, "p90": 26.964770507812506, "max": 44.717987060546875, "pos_frac": 0.84375, "sample": [6.648899078369141, 0.6630859375, 12.391780853271484, 20.24262237548828, 24.017181396484375, 12.816164016723633, -8.8916015625, -1.1382598876953125, 14.595161437988281, 4.349206924438477, -2.099578857421875, 13.702323913574219, -2.772329330444336, 9.451278686523438, 41.57307434082031, 1.9466896057128906, 11.024017333984375, 27.748260498046875, 11.957513809204102, 44.717987060546875, 12.367874145507812, 10.752059936523438, 2.0876083374023438, 32.75682067871094, 0.8314285278320312, 18.521644592285156, 20.798812866210938, 13.082284927368164, -1.8312129974365234, 7.598356246948242, 5.8943634033203125, 28.89966583251953, 1.8936729431152344, 0.9515113830566406, 0.9533462524414062, -0.6423912048339844, 14.81949234008789, 3.0190887451171875, 13.163436889648438, -1.8620681762695312, 3.709747314453125, 4.099020004272461, -1.2166900634765625, 1.69598388671875, 11.401216506958008, 1.8226318359375, 20.880203247070312, 25.136627197265625, -10.09375, 4.900054931640625, 19.0933837890625, 3.059040069580078, 1.9823188781738281, 35.497283935546875, 13.524539947509766, -0.9472694396972656, 37.2225341796875, 11.192581176757812, 22.2510986328125, 17.649736404418945, 3.8650150299072266, 24.46776580810547, 14.557197570800781, 22.936935424804688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000070.npy"} +{"epoch": 0.1027900146842878, "step": 71, "batch_size": 64, "mean": 12.864418029785156, "std": 12.86440372467041, "min": -10.091415405273438, "p10": 0.5523014068603531, "median": 10.650421142578125, "p90": 24.473324584960945, "max": 53.0882568359375, "pos_frac": 0.890625, "sample": [-5.677177429199219, 45.21026611328125, 6.527332305908203, 2.1842498779296875, 6.698482513427734, 13.760391235351562, 6.713092803955078, 12.629180908203125, 17.69512176513672, 5.670404434204102, 22.327957153320312, 7.370109558105469, 18.722702026367188, 4.892284393310547, 3.2196884155273438, 7.453704833984375, 12.370410919189453, 16.642959594726562, -0.508331298828125, 2.050813674926758, 4.798332214355469, 46.67076110839844, 7.608085632324219, 7.421440124511719, 17.513290405273438, 17.144695281982422, 6.291477203369141, 12.821956634521484, 13.616107940673828, -9.043563842773438, 4.604957580566406, 13.751754760742188, 12.185211181640625, -0.08991813659667969, 5.2134857177734375, 18.977294921875, 19.827049255371094, -0.299774169921875, 22.832015991210938, 16.71442413330078, 5.597709655761719, 14.525505065917969, 7.493316650390625, 17.82537841796875, 17.21441650390625, 19.496185302734375, 3.8965301513671875, 39.36921691894531, 51.666534423828125, 53.0882568359375, 2.5680999755859375, 25.176742553710938, -10.091415405273438, -0.9131507873535156, 14.589157104492188, 7.071197509765625, 7.969058990478516, 10.883453369140625, 20.311065673828125, 17.537826538085938, 33.2581787109375, 3.3586349487304688, 6.500705718994141, 10.417388916015625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000071.npy"} +{"epoch": 0.10425844346549193, "step": 72, "batch_size": 64, "mean": 13.383831024169922, "std": 13.651796340942383, "min": -14.252212524414062, "p10": -2.1440643310546847, "median": 10.97158432006836, "p90": 33.22061004638674, "max": 57.700927734375, "pos_frac": 0.890625, "sample": [18.94432830810547, 15.761274337768555, 6.149936676025391, 13.631874084472656, 6.954780578613281, 3.894132614135742, 9.483245849609375, 28.656173706054688, -14.252212524414062, 26.053314208984375, 15.807476043701172, 14.942501068115234, 7.968442916870117, 0.624725341796875, 17.670146942138672, 10.139629364013672, 13.491828918457031, 27.1636962890625, 15.100410461425781, 35.045013427734375, 40.474609375, -3.6176509857177734, 7.802970886230469, 10.221647262573242, 17.38910675048828, 3.5687255859375, 19.097938537597656, 10.499588012695312, 11.443580627441406, 17.01996612548828, 4.034507751464844, 5.6187744140625, 12.099884033203125, -13.684127807617188, 37.57768249511719, 28.963668823242188, 20.23328399658203, 7.220733642578125, 23.577835083007812, -4.522270202636719, 9.065711975097656, 43.00144958496094, 57.700927734375, -6.9632110595703125, 10.004932403564453, 26.299819946289062, 7.9494781494140625, 8.446086883544922, 1.3062057495117188, 37.1641845703125, -10.70208740234375, 4.482418060302734, 2.7953414916992188, 18.510894775390625, 14.592414855957031, 19.608917236328125, 10.3311767578125, 16.487403869628906, 37.080902099609375, 12.31271743774414, 6.676822662353516, 3.6534500122070312, -3.3306884765625, 1.8387336730957031], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000072.npy"} +{"epoch": 0.10572687224669604, "step": 73, "batch_size": 64, "mean": 13.05124568939209, "std": 18.70128059387207, "min": -26.7335205078125, "p10": -8.59512481689453, "median": 10.424673080444336, "p90": 33.64949836730957, "max": 67.76078796386719, "pos_frac": 0.78125, "sample": [31.06812286376953, -12.874237060546875, 7.121650695800781, 15.861061096191406, 7.351163864135742, 36.09545135498047, -6.239105224609375, 26.195053100585938, 7.50360107421875, -1.1484756469726562, 29.3570556640625, 33.393009185791016, 24.217063903808594, -1.2075042724609375, 0.8258514404296875, -15.222824096679688, 57.653778076171875, -1.9357223510742188, 10.314929962158203, 18.939170837402344, 17.98255157470703, 8.185462951660156, 10.534416198730469, 1.2009639739990234, 2.090961456298828, 10.702888488769531, -17.485050201416016, 11.099388122558594, 31.335357666015625, 15.415573120117188, 13.15240478515625, 2.9539566040039062, 57.793212890625, -11.7041015625, 5.160968780517578, 31.015647888183594, 32.12598419189453, -9.978256225585938, 4.205976486206055, 2.922565460205078, -1.9307403564453125, -5.514427185058594, -9.453536987304688, 6.962429046630859, 16.076744079589844, 16.592864990234375, 3.037233352661133, 5.639547348022461, 21.372215270996094, 43.61058044433594, 15.15081787109375, 6.468036651611328, 23.195343017578125, 6.437843322753906, 14.53070068359375, -6.5921630859375, 15.063873291015625, 55.92884826660156, 25.744094848632812, 67.76078796386719, 3.175203323364258, 33.759422302246094, 19.017593383789062, -26.7335205078125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000073.npy"} +{"epoch": 0.10719530102790015, "step": 74, "batch_size": 64, "mean": 13.770397186279297, "std": 20.495498657226562, "min": -40.672569274902344, "p10": -2.940638351440428, "median": 10.703973770141602, "p90": 31.936056518554697, "max": 119.9168701171875, "pos_frac": 0.8125, "sample": [27.11663818359375, 9.399299621582031, 12.487579345703125, 0.043552398681640625, 24.663726806640625, 7.354854583740234, 9.921146392822266, 55.74651336669922, 2.000995635986328, 13.633766174316406, 14.201881408691406, 10.297403335571289, 19.799091339111328, 1.387725830078125, 16.985626220703125, 119.9168701171875, 8.772201538085938, 29.798362731933594, 14.440227508544922, 2.8229293823242188, 2.157583236694336, 49.82801818847656, 3.0081710815429688, 32.852210998535156, 26.218490600585938, 10.72967529296875, -3.523681640625, 1.3299102783203125, -0.5428066253662109, -19.51378631591797, 15.958026885986328, -1.5802040100097656, -0.12471771240234375, 8.437385559082031, -40.672569274902344, 21.212562561035156, -1.55316162109375, 25.498714447021484, 5.174890518188477, -3.7998390197753906, -9.825645446777344, 11.571342468261719, 19.38848876953125, 25.784393310546875, -0.872467041015625, 44.8765869140625, 13.721504211425781, 4.0574798583984375, 8.734298706054688, 8.61346435546875, 10.678272247314453, 35.887176513671875, 19.149139404296875, 21.989105224609375, 38.39556121826172, 16.909225463867188, 5.457183837890625, 16.006637573242188, 25.427993774414062, 27.156890869140625, 0.6084938049316406, -3.7906951904296875, -3.8332443237304688, 13.328998565673828], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000074.npy"} +{"epoch": 0.10866372980910426, "step": 75, "batch_size": 64, "mean": 19.06302833557129, "std": 18.70346450805664, "min": -10.99371337890625, "p10": -2.3711032867431623, "median": 15.522804260253906, "p90": 46.05704116821289, "max": 70.86912536621094, "pos_frac": 0.875, "sample": [-0.4564399719238281, 22.511688232421875, 16.36800765991211, 14.022758483886719, 18.336341857910156, 13.303258895874023, 9.796661376953125, 6.406715393066406, 42.080841064453125, 61.5867919921875, 30.483306884765625, 14.601274490356445, -6.168113708496094, 27.623199462890625, 70.86912536621094, 14.66622543334961, 37.5938720703125, 59.82606506347656, 9.29452133178711, 0.021558761596679688, 42.171669006347656, 23.513290405273438, 15.16415786743164, 10.769187927246094, 18.389373779296875, 45.88910675048828, 46.12901306152344, 3.188322067260742, 8.619754791259766, 29.22915267944336, 17.949371337890625, 16.480228424072266, 5.005775451660156, 62.66014099121094, -4.7981719970703125, -3.1916732788085938, 25.195556640625, -10.99371337890625, 19.627769470214844, 18.721027374267578, 5.292552947998047, 12.161590576171875, -6.7256011962890625, 6.9317779541015625, -7.397726058959961, 27.2821044921875, 2.5830726623535156, 11.239086151123047, 35.213348388671875, 24.970367431640625, 4.449886322021484, -5.9906158447265625, 8.682634353637695, 5.773017883300781, 15.738998413085938, 4.994222640991211, 8.302780151367188, 20.35919189453125, 16.89288330078125, 17.649864196777344, 35.91869354248047, 51.386627197265625, 56.531463623046875, 15.306610107421875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000075.npy"} +{"epoch": 0.11013215859030837, "step": 76, "batch_size": 64, "mean": 12.300118446350098, "std": 14.21257495880127, "min": -36.66453552246094, "p10": -1.383794975280761, "median": 11.706378936767578, "p90": 27.822554016113283, "max": 63.71696472167969, "pos_frac": 0.859375, "sample": [13.112520217895508, 7.394985198974609, 2.150632858276367, 22.850723266601562, 2.3068161010742188, 17.330001831054688, 12.478790283203125, 24.298385620117188, 27.521358489990234, 12.872230529785156, 13.495002746582031, 19.686386108398438, 9.549509048461914, 30.28864288330078, 4.335807800292969, 4.928411483764648, 8.786613464355469, 63.71696472167969, 30.075538635253906, 4.994621276855469, -3.565032958984375, 11.602428436279297, 0.22000694274902344, 11.021705627441406, -13.193069458007812, 7.492761611938477, 5.117237091064453, 12.814323425292969, 5.8996429443359375, 7.618915557861328, 27.94879150390625, 17.871246337890625, 20.952316284179688, 8.970657348632812, 26.285316467285156, 4.061857223510742, -1.7114181518554688, 23.459930419921875, 3.892641067504883, -8.245742797851562, 6.947198867797852, 0.8205432891845703, -5.680910110473633, -3.6485595703125, 15.4547119140625, 26.654281616210938, 46.539947509765625, 27.527999877929688, 7.867168426513672, 4.650054931640625, 11.598464965820312, -0.2026653289794922, 14.948600769042969, 14.406776428222656, 14.726554870605469, 11.81032943725586, -0.6193408966064453, 13.128044128417969, 32.79363250732422, 20.805137634277344, 21.01800537109375, 29.22418212890625, 12.413490295410156, -36.66453552246094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000076.npy"} +{"epoch": 0.11160058737151249, "step": 77, "batch_size": 64, "mean": 19.283706665039062, "std": 19.269989013671875, "min": -32.8404541015625, "p10": 0.5663272857666017, "median": 19.556962966918945, "p90": 48.041534423828125, "max": 65.03570556640625, "pos_frac": 0.90625, "sample": [14.975730895996094, 42.08271026611328, 28.854778289794922, 36.850040435791016, 37.592323303222656, 0.7982177734375, 55.42301940917969, 20.306507110595703, 9.06045150756836, 9.619693756103516, 4.944143295288086, 32.40193176269531, 34.03478240966797, 5.551456451416016, 6.134368896484375, 20.80101776123047, -32.8404541015625, 19.28363800048828, 30.220489501953125, 29.683578491210938, 13.637863159179688, -2.5330772399902344, 8.448951721191406, 0.4669456481933594, 2.1930694580078125, 47.57807922363281, 2.631063461303711, 23.37220001220703, 9.830230712890625, -5.086721420288086, 29.908344268798828, 25.285202026367188, 12.756546020507812, 58.10008239746094, 32.01152801513672, 9.216190338134766, 50.851776123046875, 65.03570556640625, 18.111167907714844, 9.826217651367188, -27.659698486328125, 7.202613830566406, -10.1929931640625, 3.9591598510742188, 11.551939010620117, 25.45903778076172, 30.205467224121094, 24.559194564819336, 24.31696319580078, 25.302284240722656, 7.127635955810547, -0.8816070556640625, 4.89415168762207, 23.324661254882812, 25.179115295410156, 22.498687744140625, 3.1392974853515625, 49.98506164550781, 60.24290466308594, 9.879840850830078, 21.659683227539062, 48.24015808105469, 19.83028793334961, 6.91363525390625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000077.npy"} +{"epoch": 0.1130690161527166, "step": 78, "batch_size": 64, "mean": 14.692001342773438, "std": 15.56376838684082, "min": -15.874916076660156, "p10": -1.396270370483398, "median": 14.677362442016602, "p90": 36.72415313720704, "max": 67.34228515625, "pos_frac": 0.84375, "sample": [6.1219940185546875, 1.1440448760986328, 34.50030517578125, 12.955177307128906, -0.5603256225585938, 40.20988464355469, 15.444683074951172, 67.34228515625, 24.401641845703125, 20.14065170288086, 19.71578598022461, 7.671985626220703, -4.4231109619140625, 14.826595306396484, 20.72952651977539, 19.851215362548828, -0.5352325439453125, 7.6136474609375, -4.9853515625, 1.1480636596679688, 6.659219741821289, 15.310773849487305, 23.28799819946289, 17.285202026367188, 17.93375015258789, 14.919116973876953, 16.22180938720703, 13.1893310546875, 18.360809326171875, 5.507299423217773, 37.67723083496094, 14.857770919799805, 5.8601837158203125, -6.691707611083984, 23.044174194335938, -6.493499755859375, 44.475128173828125, 19.527454376220703, 38.492950439453125, 2.0633392333984375, 34.47309494018555, 3.7234039306640625, 45.162742614746094, 30.001415252685547, 23.929290771484375, 1.158477783203125, 10.942123413085938, 3.3632545471191406, 12.42623519897461, 8.008514404296875, 4.947898864746094, -1.5539207458496094, 14.528129577636719, -1.0284194946289062, 26.123779296875, -15.874916076660156, 45.27430725097656, 4.534717559814453, -13.426681518554688, 25.601333618164062, 3.1877288818359375, 25.876968383789062, 1.766448974609375, 22.340438842773438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000078.npy"} +{"epoch": 0.1145374449339207, "step": 79, "batch_size": 64, "mean": 16.086679458618164, "std": 19.04593276977539, "min": -12.59344482421875, "p10": -4.03254280090332, "median": 12.223844528198242, "p90": 44.80573196411135, "max": 64.92160034179688, "pos_frac": 0.734375, "sample": [-2.5362548828125, 47.49634552001953, 5.276811599731445, 59.03450012207031, 7.001670837402344, -0.5777320861816406, -1.1064434051513672, -7.0970611572265625, 38.52763366699219, 34.615264892578125, 17.264278411865234, -12.59344482421875, 22.69952392578125, 58.497802734375, 10.390975952148438, 23.064498901367188, 64.92160034179688, 47.975311279296875, 16.183635711669922, -4.4481048583984375, 62.400787353515625, 16.446205139160156, -2.1428451538085938, -8.707275390625, 23.516921997070312, 1.007406234741211, 2.3822860717773438, 14.715152740478516, 12.60708999633789, 51.252830505371094, 4.6275634765625, 8.386825561523438, 11.840599060058594, 26.887435913085938, -7.03021240234375, 8.83145523071289, -2.0174026489257812, 7.405693054199219, 37.21009826660156, 33.718231201171875, 16.424076080322266, 27.777610778808594, 29.400634765625, 17.72394561767578, 24.344131469726562, -3.7071895599365234, 2.457000732421875, 24.156204223632812, 31.64733123779297, 9.835512161254883, 12.909244537353516, -4.171979904174805, -1.0238075256347656, 33.44230651855469, -1.7798843383789062, 18.089202880859375, 5.670619964599609, 5.127799987792969, -0.14351654052734375, -4.734962463378906, 8.256317138671875, 36.22607421875, -3.508495330810547, 17.199661254882812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000079.npy"} +{"epoch": 0.11600587371512482, "step": 80, "batch_size": 64, "mean": 14.238598823547363, "std": 18.56859016418457, "min": -16.204490661621094, "p10": -4.133474922180175, "median": 10.772218704223633, "p90": 34.709458923339845, "max": 71.16537475585938, "pos_frac": 0.765625, "sample": [26.95716094970703, 6.690452575683594, -2.4812469482421875, 22.397010803222656, 4.619140625, 2.412670135498047, -1.0446357727050781, -4.8076934814453125, 18.412002563476562, 9.915702819824219, 3.044179916381836, 14.40936279296875, -13.344783782958984, -16.204490661621094, -1.3826904296875, 20.144790649414062, 70.63186645507812, 29.267745971679688, 71.16537475585938, 41.871131896972656, 17.841079711914062, 6.2331390380859375, 35.51287841796875, 34.540077209472656, 30.500015258789062, 13.172300338745117, -0.92242431640625, 23.716705322265625, 11.25967025756836, 14.4317626953125, 1.2703857421875, -12.918907165527344, 6.307460784912109, -3.45281982421875, 21.7620849609375, -8.139076232910156, 51.91807556152344, 26.585922241210938, -6.081607818603516, 67.71099853515625, 9.033803939819336, 3.05615234375, 3.447460174560547, -0.1472930908203125, 34.78205108642578, 8.913703918457031, 20.333656311035156, 22.546890258789062, 13.782173156738281, 10.284767150878906, -0.7672004699707031, -2.8577327728271484, 0.02239227294921875, 3.3057708740234375, 21.29231071472168, 18.907730102539062, 5.1795501708984375, 4.917510986328125, 27.400596618652344, 20.375755310058594, 15.310523986816406, 29.299224853515625, -4.42518424987793, 13.354934692382812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000080.npy"} +{"epoch": 0.11747430249632893, "step": 81, "batch_size": 64, "mean": 18.687902450561523, "std": 20.691730499267578, "min": -16.264312744140625, "p10": -5.703773498535154, "median": 15.951116561889648, "p90": 43.301515197753915, "max": 88.83258056640625, "pos_frac": 0.875, "sample": [31.316490173339844, 23.6151123046875, -16.264312744140625, 33.62360382080078, 18.977157592773438, 41.57850646972656, 15.451717376708984, -8.434085845947266, 6.25013542175293, 27.11457061767578, 23.101730346679688, -9.297210693359375, 1.1081008911132812, 18.172523498535156, 63.25433349609375, 5.304372787475586, 16.450515747070312, 24.339752197265625, 44.039947509765625, -9.465221405029297, 0.7310943603515625, 12.10577392578125, 18.793991088867188, 54.39775085449219, 21.005752563476562, 11.062076568603516, 11.808197021484375, 5.7859344482421875, 3.890012741088867, -3.1344223022460938, 19.32270050048828, 9.556472778320312, 4.090494155883789, 10.194206237792969, 28.08527374267578, 84.96875, 33.16909408569336, 13.635040283203125, 2.6289443969726562, 44.87347412109375, 30.256561279296875, 36.527740478515625, 11.168708801269531, 2.0128746032714844, 3.7940216064453125, 0.5740776062011719, -12.17831802368164, 11.887655258178711, 21.108436584472656, 21.274368286132812, 13.792552947998047, 14.44520378112793, 18.52666473388672, -8.632564544677734, 35.16307830810547, 38.390350341796875, 88.83258056640625, 50.42301940917969, 7.00482177734375, 30.432785034179688, 4.73779296875, 21.54589080810547, -6.804924011230469, 24.534027099609375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000081.npy"} +{"epoch": 0.11894273127753303, "step": 82, "batch_size": 64, "mean": 15.053365707397461, "std": 21.99736785888672, "min": -39.86485290527344, "p10": -7.459218215942382, "median": 13.338205337524414, "p90": 38.668991851806645, "max": 89.83114624023438, "pos_frac": 0.8125, "sample": [-0.8921318054199219, 24.90479278564453, -9.642158508300781, 16.534549713134766, 18.590438842773438, 16.240449905395508, -16.53729248046875, 12.032608032226562, 19.794540405273438, 49.01897430419922, 51.593658447265625, 11.659292221069336, -7.926296234130859, 5.2493896484375, 11.982397079467773, 80.78681945800781, 9.693473815917969, -39.86485290527344, -6.3693695068359375, 24.810272216796875, 1.8538055419921875, 29.055191040039062, 38.934730529785156, 3.4213085174560547, -14.368026733398438, -2.95068359375, 3.65545654296875, 16.474815368652344, 16.138656616210938, 10.006103515625, 13.414283752441406, 28.52142333984375, 25.285873413085938, -0.6442661285400391, 54.90034484863281, 15.647525787353516, 38.04893493652344, 13.262126922607422, 8.479719161987305, 0.7695770263671875, -0.12590789794921875, 24.083297729492188, 45.55958557128906, -9.736717224121094, 24.84740447998047, 15.992012023925781, 6.229948043823242, 37.35728073120117, 22.730979919433594, 2.8004302978515625, 6.253288269042969, 5.970703125, -39.73681640625, 4.415290832519531, 25.248138427734375, 18.38865852355957, 6.740287780761719, 21.672622680664062, 16.347618103027344, 89.83114624023438, 11.025245666503906, 17.714859008789062, 0.5456314086914062, 37.693931579589844], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000082.npy"} +{"epoch": 0.12041116005873716, "step": 83, "batch_size": 64, "mean": 16.844711303710938, "std": 18.570791244506836, "min": -19.188583374023438, "p10": -3.3879829406738273, "median": 14.35002326965332, "p90": 42.792594909667976, "max": 65.8651123046875, "pos_frac": 0.8125, "sample": [14.228935241699219, 16.789478302001953, 12.133663177490234, 53.36094665527344, 34.27558898925781, 10.297088623046875, -2.7680435180664062, 39.1875, 41.57659912109375, 7.757587432861328, 4.2699432373046875, 14.789369583129883, 7.485626220703125, -1.2612075805664062, 15.084007263183594, 54.727142333984375, -9.445632934570312, 65.8651123046875, 7.3061981201171875, 52.751060485839844, 29.787628173828125, 9.24020767211914, 43.31373596191406, -10.929603576660156, -19.188583374023438, -3.6536712646484375, 41.0208740234375, -11.821823120117188, 16.398191452026367, 48.54240417480469, 15.413093566894531, 11.0830078125, 3.708658218383789, 3.528839111328125, 15.507072448730469, 10.505241394042969, 23.45583724975586, -0.712493896484375, 27.057273864746094, 6.479499816894531, 15.401229858398438, 13.266246795654297, 32.224578857421875, 54.044342041015625, 17.454505920410156, 35.657623291015625, 5.788871765136719, -12.741912841796875, -1.139547348022461, -0.31975555419921875, -4.206878662109375, 30.427841186523438, 7.9567413330078125, 31.713729858398438, 18.65715789794922, 19.17778778076172, 0.08737945556640625, 6.411815643310547, 14.471111297607422, 25.682571411132812, 32.82843780517578, 12.391891479492188, 21.611732482910156, 4.067676544189453], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000083.npy"} +{"epoch": 0.12187958883994127, "step": 84, "batch_size": 64, "mean": 15.18610954284668, "std": 14.150245666503906, "min": -27.389175415039062, "p10": -0.18269309997558583, "median": 14.087615013122559, "p90": 32.353108215332036, "max": 60.357635498046875, "pos_frac": 0.875, "sample": [7.093101501464844, 10.415725708007812, 4.1419219970703125, 2.9713878631591797, 27.3028564453125, 4.3968658447265625, 3.88330078125, 16.922317504882812, 9.736272811889648, 11.000450134277344, 24.993335723876953, 23.58599090576172, -9.10980224609375, 14.492935180664062, 21.149600982666016, 27.72998809814453, 7.950168609619141, 60.357635498046875, 23.547061920166016, 1.4079132080078125, 6.577480316162109, 24.46075439453125, 11.95166015625, 5.520816802978516, -1.1441459655761719, 32.86268615722656, 19.045166015625, 31.164093017578125, -27.389175415039062, -0.5108070373535156, 41.734962463378906, 4.289390563964844, 29.36988067626953, 13.671913146972656, 12.935951232910156, 1.2694854736328125, 34.204627990722656, 28.292068481445312, 20.318161010742188, 19.072296142578125, 4.519866943359375, 25.123489379882812, -0.22431182861328125, 13.682294845581055, 23.724578857421875, 22.360977172851562, 0.1424846649169922, -0.3142433166503906, 23.478973388671875, 24.291412353515625, 4.7633209228515625, 36.59473419189453, 38.16612243652344, 18.3443603515625, 36.637359619140625, 15.84375, 24.07398223876953, 4.613868713378906, -4.8692169189453125, 24.735939025878906, 8.989021301269531, 15.118659973144531, 10.53285026550293, -0.08558273315429688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000084.npy"} +{"epoch": 0.12334801762114538, "step": 85, "batch_size": 64, "mean": 14.56472110748291, "std": 16.396625518798828, "min": -14.452140808105469, "p10": -1.9663673400878905, "median": 12.202058792114258, "p90": 38.04382858276367, "max": 54.83708190917969, "pos_frac": 0.78125, "sample": [30.60375213623047, 9.435478210449219, 23.242576599121094, 2.2814292907714844, 5.345367431640625, 18.244220733642578, 14.117645263671875, 2.4811363220214844, 45.790626525878906, 5.465232849121094, -2.461820602416992, 7.5709991455078125, 41.99211120605469, 16.209686279296875, 15.0228271484375, 15.011260986328125, -1.092529296875, -9.011070251464844, 15.010299682617188, -2.4535293579101562, -14.452140808105469, 49.571807861328125, 11.368270874023438, -1.1961174011230469, 2.389312744140625, 31.77978515625, 10.253318786621094, 1.5613861083984375, 12.471580505371094, 7.5074310302734375, 32.8780517578125, 0.77581787109375, -5.2002105712890625, -2.0629806518554688, 10.161094665527344, 4.294881820678711, 49.8975830078125, 14.61907958984375, 25.311363220214844, 1.2049312591552734, -0.8710308074951172, 54.83708190917969, -1.0660934448242188, 28.59027099609375, 36.18363952636719, 11.932537078857422, 36.89421081542969, 36.02351379394531, 5.111328125, 3.4818248748779297, 18.661109924316406, 38.536521911621094, -1.0496826171875, -1.740936279296875, 12.570884704589844, 13.553606033325195, 41.28498840332031, 12.623531341552734, 27.18138885498047, 27.469524383544922, -11.741104125976562, 15.201835632324219, -0.1963825225830078, 32.72962951660156], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000085.npy"} +{"epoch": 0.12481644640234948, "step": 86, "batch_size": 64, "mean": 15.120819091796875, "std": 22.13315773010254, "min": -23.043365478515625, "p10": -5.099515914916992, "median": 9.609776496887207, "p90": 40.32509613037111, "max": 90.91937255859375, "pos_frac": 0.796875, "sample": [-5.258033752441406, 11.207061767578125, 41.907318115234375, -8.560562133789062, 28.549501419067383, 16.710540771484375, 20.060571670532227, -2.612396240234375, 59.299713134765625, 5.969245910644531, 3.832111358642578, 3.6467437744140625, 3.2959365844726562, 13.168380737304688, -2.7030410766601562, 14.798652648925781, 33.773590087890625, 16.749908447265625, -0.3207969665527344, -20.642791748046875, 30.894073486328125, 10.01742935180664, 22.059925079345703, 9.202123641967773, 7.111366271972656, -11.196430206298828, 4.332481384277344, 48.000885009765625, -4.729640960693359, 3.4249725341796875, 25.82590103149414, 16.185630798339844, 4.8718719482421875, -23.043365478515625, 29.609039306640625, 36.790008544921875, 15.979362487792969, -15.525703430175781, 41.84013366699219, 4.738611221313477, 78.35061645507812, 16.390155792236328, 19.766708374023438, 7.220600128173828, -0.36234474182128906, 90.91937255859375, 5.911809921264648, 78.31182861328125, 28.94464111328125, 5.20330810546875, -19.79114532470703, 30.72002410888672, 8.438587188720703, 27.638954162597656, 17.891136169433594, 27.583209991455078, 29.74013328552246, 1.4536590576171875, 5.84747314453125, 5.687618255615234, 0.60858154296875, -3.0295257568359375, 2.9356842041015625, 12.09103012084961], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000086.npy"} +{"epoch": 0.1262848751835536, "step": 87, "batch_size": 64, "mean": 16.435949325561523, "std": 19.270051956176758, "min": -30.819366455078125, "p10": -0.8358196258544919, "median": 14.930912017822266, "p90": 40.137254333496095, "max": 86.22776794433594, "pos_frac": 0.828125, "sample": [17.28917121887207, 21.421669006347656, 0.08962821960449219, 7.425071716308594, 20.080963134765625, 5.65057373046875, 15.107559204101562, -0.3676910400390625, 1.3020095825195312, 1.9622650146484375, -0.5148162841796875, 20.940048217773438, -1.797332763671875, 39.876708984375, 18.604934692382812, 27.616744995117188, 37.04975891113281, 8.826604843139648, 63.616607666015625, 30.9383544921875, 14.827224731445312, 14.466156005859375, 5.340023040771484, 26.202880859375, 18.841217041015625, 1.4536190032958984, 20.004074096679688, 86.22776794433594, 14.573379516601562, -0.4044189453125, 12.459493637084961, 40.24891662597656, 42.9310302734375, 4.775871276855469, 39.6123046875, 16.667469024658203, 11.47779655456543, 24.329654693603516, 13.720954895019531, 32.80332946777344, -0.3285636901855469, 49.71327209472656, 19.64398956298828, 18.458663940429688, -30.819366455078125, 5.162042617797852, 24.664588928222656, -23.127229690551758, 25.94318389892578, 43.599700927734375, -4.542026519775391, -3.7278404235839844, 6.152750015258789, 15.034599304199219, 0.5009613037109375, 43.522735595703125, 24.443695068359375, 23.34814453125, -19.465492248535156, 13.500640869140625, 12.6976318359375, 1.2114620208740234, 31.6090087890625, -0.9733924865722656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000087.npy"} +{"epoch": 0.1277533039647577, "step": 88, "batch_size": 64, "mean": 12.606681823730469, "std": 17.17668342590332, "min": -24.130783081054688, "p10": -2.415430259704589, "median": 10.607963562011719, "p90": 33.84362182617188, "max": 67.9146728515625, "pos_frac": 0.828125, "sample": [10.804435729980469, 8.144256591796875, 46.1583251953125, 34.84881591796875, 24.638282775878906, 25.98822021484375, 1.0082378387451172, 39.44792175292969, 4.683753967285156, 8.727230072021484, 20.141921997070312, 4.51161003112793, 8.776443481445312, 23.441871643066406, -12.287246704101562, 20.239425659179688, -12.188568115234375, 15.358299255371094, -21.8966064453125, -19.968017578125, -1.875253677368164, 43.462371826171875, 20.991294860839844, -2.6469345092773438, 25.39508819580078, 3.3737239837646484, 18.039215087890625, -0.8442230224609375, 15.663736343383789, 20.51251220703125, 26.882225036621094, 5.011589050292969, -0.4113597869873047, 23.581199645996094, 0.9104843139648438, 54.10205078125, 16.309471130371094, 31.4981689453125, 2.5606231689453125, 67.9146728515625, 2.58221435546875, 20.732749938964844, 4.9457244873046875, 11.569101333618164, 10.7135009765625, 11.512008666992188, 4.932655334472656, 0.9863548278808594, 19.093788146972656, 41.99755859375, 19.87103271484375, 10.502426147460938, 3.452869415283203, 3.9595184326171875, -24.130783081054688, 0.3300933837890625, 6.443878173828125, -1.4608306884765625, 5.789159774780273, 12.77060317993164, 14.982309341430664, -13.190826416015625, 9.652450561523438, 27.780776977539062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000088.npy"} +{"epoch": 0.12922173274596183, "step": 89, "batch_size": 64, "mean": 12.958422660827637, "std": 14.058304786682129, "min": -24.18975830078125, "p10": -1.1230653762817375, "median": 11.840089797973633, "p90": 30.348101806640646, "max": 63.39625549316406, "pos_frac": 0.875, "sample": [1.540924072265625, 1.6628532409667969, 2.5439300537109375, 15.57675552368164, -2.9094581604003906, 24.58734130859375, 16.245445251464844, 13.348106384277344, -1.4676971435546875, 39.88214874267578, 33.02415466308594, 18.18294906616211, 13.433143615722656, 24.607650756835938, 15.181755065917969, 23.87420654296875, 10.674545288085938, 6.524559020996094, -1.415182113647461, 12.881080627441406, 8.496902465820312, 1.6050891876220703, 10.98507308959961, 7.031436920166016, -3.0071258544921875, 4.415729522705078, -0.44145965576171875, 5.918994903564453, 35.974334716796875, 10.955039978027344, 10.620674133300781, 49.396392822265625, 13.174304962158203, 32.659034729003906, 14.181533813476562, 4.4909515380859375, 15.573402404785156, 63.39625549316406, 0.9191131591796875, 1.1223316192626953, 18.195362091064453, 5.716953277587891, 12.908243179321289, 16.87960433959961, 14.69715690612793, 6.108737945556641, 9.727596282958984, 4.154590606689453, 5.347991943359375, 19.021629333496094, 12.695106506347656, 18.957626342773438, -1.5108757019042969, 8.222373962402344, 24.95592498779297, -24.18975830078125, -5.302886962890625, 15.979736328125, 13.44929313659668, 8.721565246582031, 18.9017333984375, 15.506080627441406, 51.40016174316406, 3.3479042053222656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000089.npy"} +{"epoch": 0.13069016152716592, "step": 90, "batch_size": 64, "mean": 15.813643455505371, "std": 20.880434036254883, "min": -21.8197021484375, "p10": -1.0104305267333982, "median": 9.538124084472656, "p90": 46.21929016113282, "max": 73.5625, "pos_frac": 0.84375, "sample": [36.44163513183594, -20.36883544921875, 26.453041076660156, 46.468048095703125, 15.69339370727539, 24.802104949951172, 0.24790382385253906, -0.5848922729492188, 34.691497802734375, 0.0051021575927734375, 26.08673095703125, 9.332504272460938, 1.6024665832519531, 9.082077026367188, 17.048057556152344, 55.811622619628906, -0.7827644348144531, 72.75946044921875, 30.257522583007812, 3.0955657958984375, 2.265735626220703, 3.2484130859375, 7.8152618408203125, -1.108001708984375, 64.20916748046875, 14.732025146484375, 20.567642211914062, 63.97174072265625, 3.664257049560547, 16.615123748779297, 73.5625, 10.281425476074219, -1.9805450439453125, 4.252241134643555, 9.743743896484375, -21.8197021484375, 15.51058578491211, 5.602367401123047, 32.010162353515625, 3.9958724975585938, 6.132289886474609, 8.557487487792969, -10.734748840332031, -0.26720428466796875, 5.3317718505859375, 37.097381591796875, 45.63885498046875, -20.332115173339844, -6.3284149169921875, 7.355224609375, 14.057838439941406, 1.9130611419677734, 13.017744064331055, 16.70989227294922, 29.958648681640625, 18.016517639160156, 8.871562957763672, 4.500574111938477, 15.5308837890625, 4.550319671630859, 10.049797058105469, 7.338996887207031, 52.608642578125, 27.21595001220703], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000090.npy"} +{"epoch": 0.13215859030837004, "step": 91, "batch_size": 64, "mean": 15.881075859069824, "std": 17.729496002197266, "min": -11.3492431640625, "p10": -3.8981338500976555, "median": 12.374858856201172, "p90": 38.38378677368164, "max": 65.98548889160156, "pos_frac": 0.765625, "sample": [12.214889526367188, 34.579742431640625, -10.089263916015625, 4.440864562988281, 38.50262451171875, -0.39513397216796875, -0.15494918823242188, 29.861156463623047, 4.894611358642578, 12.534828186035156, 27.384124755859375, 22.345840454101562, 25.170318603515625, 10.912826538085938, -11.3492431640625, 65.98548889160156, -5.0235748291015625, 1.9336318969726562, 3.688934326171875, 46.01011657714844, 9.873039245605469, 19.973339080810547, 52.465087890625, -3.0345458984375, 21.4420166015625, 38.10649871826172, -3.1705551147460938, 0.3072948455810547, 12.063064575195312, -1.4928665161132812, 1.7366943359375, 14.138349533081055, 35.56327819824219, -7.58050537109375, 16.170196533203125, 11.76129150390625, 31.85973358154297, -4.59619140625, 14.539440155029297, -0.5540027618408203, 7.322776794433594, 31.792327880859375, 35.96019744873047, 3.2818145751953125, -4.209953308105469, 20.9501953125, 16.77490997314453, 45.79248046875, 30.341453552246094, 20.513099670410156, 16.407196044921875, -10.104118347167969, 0.9147891998291016, 9.194747924804688, -0.5399742126464844, 31.999004364013672, 0.8948116302490234, 49.46910095214844, 26.79067611694336, 9.725616455078125, 29.698745727539062, -2.0508956909179688, 24.076629638671875, 48.374717712402344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000091.npy"} +{"epoch": 0.13362701908957417, "step": 92, "batch_size": 64, "mean": 14.722909927368164, "std": 17.639738082885742, "min": -21.77862548828125, "p10": -3.8795246124267573, "median": 11.375709533691406, "p90": 36.24598274230958, "max": 65.26766967773438, "pos_frac": 0.765625, "sample": [0.3945465087890625, 2.9945220947265625, 11.119316101074219, 26.443496704101562, -8.323509216308594, 3.9676513671875, -21.77862548828125, -6.84632682800293, 22.20172119140625, 13.510833740234375, 36.948909759521484, 1.3691844940185547, 23.734298706054688, 1.3235015869140625, 3.0106887817382812, 61.62109375, 33.864051818847656, 65.26766967773438, -7.23150634765625, -4.769195556640625, 14.970943450927734, -5.651214599609375, 21.215469360351562, -2.6243629455566406, -1.8459854125976562, 26.467987060546875, -3.125720977783203, 42.67535400390625, 21.217750549316406, 7.899442672729492, 48.873390197753906, -1.1311225891113281, 22.99340057373047, 2.860300064086914, 33.0517578125, 2.811840057373047, 11.632102966308594, 50.64640808105469, 7.044136047363281, 30.729034423828125, 25.996910095214844, 34.60581970214844, -0.8587799072265625, 2.645050048828125, 3.0857772827148438, 10.877506256103516, -1.6697845458984375, 19.675399780273438, 24.23155975341797, 5.077659606933594, -1.1303634643554688, -4.202583312988281, 38.17678451538086, 8.446725845336914, 12.854913711547852, 22.65093231201172, 24.663223266601562, 22.084373474121094, 27.08123016357422, 13.86102294921875, 10.855119705200195, -1.0185317993164062, 34.256614685058594, 20.486427307128906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000092.npy"} +{"epoch": 0.13509544787077826, "step": 93, "batch_size": 64, "mean": 14.930685043334961, "std": 15.610140800476074, "min": -17.757850646972656, "p10": -0.5473608016967771, "median": 11.557178497314453, "p90": 35.19791870117187, "max": 82.59530639648438, "pos_frac": 0.875, "sample": [18.303550720214844, 5.516513824462891, 13.937423706054688, 38.17558288574219, 39.643699645996094, 21.9937744140625, 5.1121063232421875, 9.938152313232422, 22.41558074951172, 0.10602569580078125, 38.53736877441406, 4.115516662597656, 28.61870574951172, 31.499380111694336, 82.59530639648438, 6.498912811279297, 20.43523406982422, 5.411369323730469, 8.531455993652344, 47.30077362060547, -3.975250244140625, 10.227109909057617, 15.096546173095703, -0.64794921875, 16.745864868164062, 12.326438903808594, 12.202476501464844, 6.396232604980469, -2.636140823364258, 11.594047546386719, -0.3126544952392578, 11.1275634765625, 5.999292373657227, 17.80957794189453, 23.150562286376953, 25.138107299804688, 35.15734100341797, -2.4987030029296875, -2.2795867919921875, 27.79339599609375, 21.56116485595703, 26.93730354309082, 7.691810607910156, 2.7812061309814453, 7.286724090576172, 5.158599853515625, 12.120670318603516, 2.0564002990722656, 37.14704132080078, 6.737043380737305, 2.8157577514648438, 35.215309143066406, 15.233978271484375, 6.799613952636719, 4.585784912109375, 5.116645812988281, -5.678962707519531, 29.087547302246094, 26.157665252685547, 23.68750762939453, 19.635940551757812, 11.520309448242188, -17.757850646972656, 2.5658817291259766], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000093.npy"} +{"epoch": 0.13656387665198239, "step": 94, "batch_size": 64, "mean": 15.727283477783203, "std": 14.842207908630371, "min": -5.794677734375, "p10": -1.7529954910278307, "median": 13.65335750579834, "p90": 38.72511558532715, "max": 55.517303466796875, "pos_frac": 0.859375, "sample": [14.275054931640625, -2.273059844970703, 27.487930297851562, 9.202919006347656, 7.0263671875, -4.113471984863281, 4.134151458740234, 38.2380256652832, 4.0387725830078125, -5.794677734375, 18.964418411254883, 10.399093627929688, 14.001121520996094, 2.2972869873046875, 5.895748138427734, 0.7536869049072266, 31.852874755859375, 7.5572662353515625, -2.613903045654297, 55.517303466796875, 5.046943664550781, 7.956912994384766, 16.607498168945312, 29.93370819091797, 39.197601318359375, 15.314605712890625, 11.537811279296875, 17.00798797607422, 12.246501922607422, 0.48760223388671875, 4.402517318725586, 13.328603744506836, 20.74138832092285, 25.493560791015625, 24.97454833984375, 12.498104095458984, 21.09503173828125, 33.355201721191406, 24.58855438232422, 17.490434646606445, 2.1936588287353516, 27.220481872558594, 6.67755126953125, -2.25445556640625, 38.933868408203125, 41.91438293457031, -0.5829219818115234, 42.095916748046875, 42.0830078125, -0.182373046875, -3.5108184814453125, 54.89991760253906, 13.978111267089844, 24.840286254882812, 0.4257659912109375, 8.136260986328125, -3.85064697265625, 2.5704193115234375, 24.278770446777344, 4.4713592529296875, 21.728412628173828, 25.019943237304688, 25.6246337890625, 19.682586669921875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000094.npy"} +{"epoch": 0.13803230543318648, "step": 95, "batch_size": 64, "mean": 12.927989959716797, "std": 12.508003234863281, "min": -21.892471313476562, "p10": 0.15100936889648472, "median": 12.399640083312988, "p90": 33.087457275390626, "max": 46.16912078857422, "pos_frac": 0.90625, "sample": [4.8342742919921875, 19.147682189941406, 14.460556030273438, 13.357398986816406, 11.72856330871582, 35.9486083984375, 8.001461029052734, 3.761608123779297, -0.9729824066162109, 15.043556213378906, 15.442827224731445, 3.0558319091796875, 15.441410064697266, 38.38035583496094, 33.263458251953125, 8.73086929321289, 18.070640563964844, 5.998106002807617, 5.851285934448242, 13.989822387695312, 5.46112060546875, 16.72924041748047, 13.88916015625, 10.04005241394043, 7.005348205566406, 0.8432769775390625, 3.947784423828125, 22.677696228027344, 12.383367538452148, 17.59780502319336, -0.8344459533691406, 3.87261962890625, 35.68865966796875, 38.53306579589844, 46.16912078857422, 29.040931701660156, 12.939674377441406, 2.3528785705566406, 2.21087646484375, 25.00921630859375, 24.024059295654297, 32.676788330078125, 38.313568115234375, 3.989288330078125, 13.711309432983398, 8.59488296508789, 0.4865684509277344, 18.322256088256836, 6.597099304199219, -21.892471313476562, -0.24858665466308594, 1.425008773803711, -4.5596923828125, 16.34918212890625, 24.23645782470703, 12.415912628173828, 19.79443359375, 9.667329788208008, 11.638790130615234, 12.908248901367188, 0.007198333740234375, 1.7812328338623047, 12.955854415893555, -4.896110534667969], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000095.npy"} +{"epoch": 0.1395007342143906, "step": 96, "batch_size": 64, "mean": 17.10177230834961, "std": 17.06407928466797, "min": -21.061553955078125, "p10": 0.6136047363281251, "median": 15.579402923583984, "p90": 38.818279266357436, "max": 79.47856140136719, "pos_frac": 0.921875, "sample": [25.507675170898438, 18.597156524658203, 18.766311645507812, 13.56161880493164, 24.514892578125, 30.657516479492188, 11.212326049804688, 0.9471111297607422, 0.175506591796875, 25.793792724609375, 56.52677917480469, 7.33477783203125, 15.459770202636719, 2.5499534606933594, -2.434459686279297, 16.16119384765625, 19.234275817871094, 28.829025268554688, 51.900550842285156, 43.121246337890625, 9.666719436645508, 20.13086700439453, 15.69903564453125, 79.47856140136719, 35.753883361816406, 13.106914520263672, 20.813980102539062, 17.917510986328125, 13.048675537109375, 11.296524047851562, 5.599395751953125, 56.166717529296875, 19.889020919799805, 15.732078552246094, 2.8985671997070312, 40.131591796875, 23.449752807617188, 3.9746780395507812, 21.571060180664062, 2.9327144622802734, 22.86284637451172, 21.54877281188965, 8.190292358398438, 0.580078125, 7.512470245361328, -0.06471824645996094, 10.530784606933594, 19.015869140625, 0.69183349609375, 8.545997619628906, 9.504058837890625, 22.966712951660156, -5.033271789550781, -2.6547164916992188, 25.64581298828125, 1.7023773193359375, 9.371843338012695, 22.149063110351562, 58.62528991699219, 14.236328125, 1.2128677368164062, 16.47614288330078, -21.061553955078125, 4.283073425292969], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000096.npy"} +{"epoch": 0.14096916299559473, "step": 97, "batch_size": 64, "mean": 13.75833511352539, "std": 12.895035743713379, "min": -17.874099731445312, "p10": -0.5141550064086912, "median": 13.000584602355957, "p90": 29.66490249633789, "max": 53.748199462890625, "pos_frac": 0.859375, "sample": [9.710060119628906, 29.2288818359375, 15.99896240234375, 9.632415771484375, -0.22753524780273438, 6.137199401855469, 13.366260528564453, 16.052425384521484, -0.2671680450439453, 15.660682678222656, 10.858783721923828, 19.54505157470703, 0.16001510620117188, 12.489765167236328, 47.30116271972656, 22.842117309570312, 0.8961639404296875, 6.0096893310546875, 1.0892791748046875, 8.066247940063477, 15.095695495605469, 5.430744171142578, 9.523662567138672, 3.1602935791015625, 38.843414306640625, 15.411327362060547, 15.495277404785156, 16.13751220703125, 24.413589477539062, 10.208541870117188, 13.988014221191406, -0.6200065612792969, 7.3450164794921875, -1.3163928985595703, 18.775840759277344, 13.408111572265625, 30.06719207763672, 16.836055755615234, -17.874099731445312, -1.2127418518066406, 29.851768493652344, 24.178340911865234, -2.7813491821289062, 2.660552978515625, 28.21478271484375, 13.943599700927734, 2.5015640258789062, 13.794536590576172, 12.634908676147461, 8.733543395996094, -0.6881656646728516, 25.808120727539062, -4.034685134887695, 28.511028289794922, 11.53973388671875, 28.08526611328125, 4.058189392089844, 35.98405456542969, 23.54632568359375, 32.15476989746094, 20.902843475341797, 1.4298439025878906, 53.748199462890625, 8.088134765625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000097.npy"} +{"epoch": 0.14243759177679882, "step": 98, "batch_size": 64, "mean": 14.806070327758789, "std": 15.301271438598633, "min": -33.107086181640625, "p10": -3.473175048828125, "median": 13.046260833740234, "p90": 35.41066970825195, "max": 46.804412841796875, "pos_frac": 0.828125, "sample": [10.250022888183594, 7.064455032348633, 40.66325378417969, 17.2257080078125, 3.035236358642578, 12.547853469848633, 10.164047241210938, 13.798370361328125, 11.717605590820312, 3.1187973022460938, 16.921159744262695, 10.0115966796875, 13.232696533203125, 11.335319519042969, 34.71672058105469, 28.359024047851562, -6.075168609619141, 8.275115966796875, 6.09765625, -9.021469116210938, 25.340702056884766, 21.178237915039062, -4.229736328125, 1.1066131591796875, 35.539710998535156, 26.843509674072266, 21.671218872070312, 36.006752014160156, 30.397018432617188, 15.433212280273438, 14.638015747070312, 3.124134063720703, 33.17185974121094, 40.83636474609375, -33.107086181640625, 38.08454895019531, 35.10957336425781, 15.792999267578125, 18.728591918945312, 8.769599914550781, -3.483325958251953, 23.04351806640625, -4.7819671630859375, 12.859825134277344, 17.92974853515625, 3.6186771392822266, 25.297393798828125, 45.22088623046875, 31.527076721191406, 21.758548736572266, 5.780303955078125, -0.5218505859375, 25.23223876953125, 46.804412841796875, 21.549850463867188, 11.003910064697266, 6.3940277099609375, 4.154226303100586, -3.4494895935058594, -0.7055397033691406, 33.58892059326172, -0.9688034057617188, 3.482421875, -5.620391845703125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000098.npy"} +{"epoch": 0.14390602055800295, "step": 99, "batch_size": 64, "mean": 16.02899932861328, "std": 16.38023567199707, "min": -6.7105560302734375, "p10": 1.4654903411865237, "median": 10.561936378479004, "p90": 34.031663513183595, "max": 75.30038452148438, "pos_frac": 0.9375, "sample": [2.8040637969970703, -0.146148681640625, 2.811126708984375, 12.10787582397461, 2.8706398010253906, 25.13916778564453, 7.778697967529297, 9.554145812988281, 48.448089599609375, 10.74030876159668, 8.559059143066406, 48.04338073730469, 16.516551971435547, -6.7105560302734375, 29.37933349609375, 10.362419128417969, 5.348503112792969, 21.236236572265625, 1.3019752502441406, 6.237823486328125, 23.65362548828125, 6.139865875244141, 3.940196990966797, 75.30038452148438, 2.3192520141601562, 21.931716918945312, 6.268730163574219, -2.9442214965820312, 8.600093841552734, -6.6666412353515625, 3.1980133056640625, 31.433815002441406, 19.967483520507812, 14.146934509277344, 23.592750549316406, 63.40672302246094, 34.515533447265625, 10.383563995361328, 15.137153625488281, 8.672981262207031, 1.3592147827148438, 13.580879211425781, 15.684371948242188, 26.22060775756836, 56.511573791503906, 3.1767807006835938, 10.102519989013672, 31.54784393310547, 0.30741119384765625, 3.476024627685547, 17.696182250976562, 32.90263366699219, 11.969558715820312, 6.532634735107422, 24.22882080078125, 4.663116455078125, 20.112125396728516, 10.289571762084961, 29.681365966796875, 38.87992858886719, 14.44032096862793, 23.533599853515625, 1.7134666442871094, 1.8647689819335938], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000099.npy"} +{"epoch": 0.14537444933920704, "step": 100, "batch_size": 64, "mean": 11.581656455993652, "std": 15.50505256652832, "min": -23.169559478759766, "p10": -2.857125663757324, "median": 8.712264060974121, "p90": 34.95568962097168, "max": 66.69366455078125, "pos_frac": 0.8125, "sample": [29.14939308166504, 19.03385353088379, -2.9991378784179688, 6.382438659667969, 9.552173614501953, 6.617439270019531, 19.364835739135742, 6.520191192626953, 8.706808090209961, 6.71234130859375, 7.290985107421875, 0.5030632019042969, 17.229934692382812, 9.468616485595703, 0.8705615997314453, 41.265899658203125, 10.477409362792969, -18.203109741210938, -0.1141510009765625, -11.286697387695312, -2.613046646118164, -1.4453163146972656, 2.503570556640625, -23.169559478759766, 34.503665924072266, 16.36373519897461, 27.89474868774414, 14.33807373046875, 0.460845947265625, 5.472326278686523, 21.600509643554688, 3.16717529296875, 24.279190063476562, -0.4066047668457031, 36.80354309082031, 16.400123596191406, 4.7257537841796875, 8.717720031738281, 25.979873657226562, 8.956344604492188, 21.182628631591797, 8.030891418457031, 8.424041748046875, 2.8177623748779297, 15.523628234863281, 19.39855194091797, 8.817832946777344, -9.518577575683594, -0.9626541137695312, 35.1494140625, 18.69426727294922, 37.077369689941406, 15.579540252685547, -18.634429931640625, 66.69366455078125, 1.1520767211914062, 8.442512512207031, 36.929779052734375, -2.96173095703125, 38.035736083984375, 13.325508117675781, 4.979747772216797, 15.67812728881836, 6.294788360595703], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000100.npy"} +{"epoch": 0.14684287812041116, "step": 101, "batch_size": 64, "mean": 13.298306465148926, "std": 16.0115966796875, "min": -13.437477111816406, "p10": -2.6544401168823226, "median": 11.504708290100098, "p90": 33.21231689453125, "max": 71.739501953125, "pos_frac": 0.796875, "sample": [32.69853210449219, 71.739501953125, 17.750381469726562, 15.777267456054688, 0.34046173095703125, 44.83594512939453, 49.26139831542969, 2.3334617614746094, 42.41645812988281, 51.12989807128906, -3.421173095703125, 11.400115966796875, -0.6746749877929688, 25.07335662841797, 6.761272430419922, 0.03362274169921875, 1.5172958374023438, 5.832422256469727, 19.44861602783203, 18.896392822265625, 1.1024703979492188, 10.095260620117188, 23.442710876464844, 21.192100524902344, 15.913848876953125, 11.428525924682617, 4.205787658691406, 33.43251037597656, 41.96624755859375, 10.259902954101562, 9.1107177734375, -3.3958301544189453, -6.917705535888672, 24.79449462890625, 12.85157585144043, 15.612075805664062, 25.042892456054688, 18.221710205078125, -0.924530029296875, 5.098918914794922, -0.236663818359375, -8.753665924072266, 11.580890655517578, 7.271247863769531, 6.904380798339844, -0.5351009368896484, 12.407234191894531, 15.078105926513672, 5.112945556640625, 12.202615737915039, -13.437477111816406, 24.22895050048828, 13.173538208007812, -6.6963958740234375, 16.656902313232422, 13.473901748657227, -0.3384056091308594, 19.685047149658203, 1.0326080322265625, -0.9129962921142578, 29.289276123046875, -9.167903900146484, 14.475421905517578, 2.9129180908203125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000101.npy"} +{"epoch": 0.14831130690161526, "step": 102, "batch_size": 64, "mean": 10.292984008789062, "std": 14.347973823547363, "min": -15.760269165039062, "p10": -6.197046661376951, "median": 10.288660049438477, "p90": 26.12673873901369, "max": 64.38186645507812, "pos_frac": 0.734375, "sample": [2.1675949096679688, 27.92315673828125, 15.278213500976562, 1.0778656005859375, 17.226165771484375, 2.87445068359375, -8.944412231445312, -15.760269165039062, 14.474687576293945, -12.362106323242188, 19.2967529296875, 32.12200927734375, 4.3315277099609375, 16.27684783935547, 12.590127944946289, -1.8763313293457031, 8.722618103027344, 17.316120147705078, 21.935096740722656, -0.4905281066894531, 9.32830810546875, -2.118246078491211, 12.638324737548828, -2.0390682220458984, 21.907379150390625, 12.941486358642578, 17.67669105529785, 64.38186645507812, -0.5782012939453125, 0.30353546142578125, 42.86677551269531, -0.07473373413085938, 21.171573638916016, 21.366052627563477, 2.2950305938720703, -3.7298126220703125, 9.618448257446289, -13.813644409179688, 3.6487274169921875, -8.175346374511719, 1.0453472137451172, -9.049057006835938, -7.105018615722656, 11.408660888671875, 20.055267333984375, 7.24560546875, 14.92919921875, 35.608428955078125, 19.454696655273438, 19.862152099609375, 31.891983032226562, -2.2348098754882812, 2.2524795532226562, 7.948158264160156, 30.79065704345703, 0.30731201171875, 18.486900329589844, 12.989234924316406, 10.958871841430664, 21.017471313476562, -3.48443603515625, -4.0784454345703125, 13.727210998535156, 20.928401947021484], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000102.npy"} +{"epoch": 0.14977973568281938, "step": 103, "batch_size": 64, "mean": 17.9302921295166, "std": 15.200693130493164, "min": -8.713722229003906, "p10": 1.1360929489135747, "median": 15.167030334472656, "p90": 41.95016403198244, "max": 57.15484619140625, "pos_frac": 0.90625, "sample": [4.509670257568359, 5.749458312988281, 8.477048873901367, 9.560462951660156, 10.486953735351562, -3.767669677734375, 22.46859359741211, 19.963768005371094, 6.2135009765625, 14.144878387451172, 16.40701675415039, 16.991830825805664, 5.381782531738281, 18.01367950439453, 0.8968124389648438, 34.05841827392578, 51.5111083984375, 18.342308044433594, 23.57288360595703, 21.305625915527344, 6.3739776611328125, 11.59344482421875, 16.312515258789062, 23.30975341796875, -2.8183135986328125, 32.59423828125, 19.160663604736328, -5.328460693359375, 2.898834228515625, 15.209259033203125, 10.762443542480469, 50.92523193359375, 9.811210632324219, 21.998851776123047, 31.176536560058594, 16.599571228027344, -0.5408706665039062, -8.713722229003906, 11.636953353881836, 15.060211181640625, 1.7715606689453125, 15.124801635742188, 19.05658721923828, 13.280242919921875, 10.2037353515625, 54.093666076660156, 33.79335021972656, 18.211326599121094, 57.15484619140625, 43.78443908691406, 49.08361053466797, 32.63294982910156, 10.993728637695312, 13.507331848144531, 37.670188903808594, -1.2379016876220703, 22.013559341430664, 1.6944141387939453, 6.069915771484375, 29.74969482421875, 46.82648468017578, 13.948917388916016, 12.944290161132812, 22.8564395904541], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000103.npy"} +{"epoch": 0.1512481644640235, "step": 104, "batch_size": 64, "mean": 16.904809951782227, "std": 17.527902603149414, "min": -16.956024169921875, "p10": -0.45309448242187444, "median": 12.21213150024414, "p90": 42.73717956542969, "max": 60.244659423828125, "pos_frac": 0.890625, "sample": [32.86827850341797, 5.07281494140625, 4.628778457641602, 3.2838516235351562, 27.109451293945312, 6.912315368652344, -6.484748840332031, 1.2586174011230469, 13.713653564453125, 17.695358276367188, -3.507608413696289, 10.720016479492188, 58.62034606933594, 27.295936584472656, 35.299903869628906, 1.1672592163085938, 43.280609130859375, 21.107942581176758, -3.8200149536132812, 49.171112060546875, 12.823272705078125, 13.268470764160156, 16.61417007446289, 5.3708343505859375, 7.58991813659668, 2.161905288696289, 60.244659423828125, -3.4186782836914062, 24.153045654296875, 55.11781311035156, 54.441253662109375, 14.961593627929688, 5.761177062988281, 27.068069458007812, 4.516529083251953, 3.9364051818847656, 0.5342636108398438, -3.4203948974609375, 38.47979736328125, 8.376815795898438, 29.52190399169922, 8.306076049804688, 30.470481872558594, 9.700325012207031, 47.803462982177734, 25.92057991027832, 19.28520965576172, 16.381668090820312, 8.766845703125, 22.90631103515625, -0.6943759918212891, 35.86793518066406, 6.0770111083984375, 11.187530517578125, 41.46917724609375, 34.974510192871094, 0.24753379821777344, 19.49934196472168, 0.10989570617675781, 15.21231460571289, 11.600990295410156, 2.3062095642089844, -16.956024169921875, 7.968076705932617], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000104.npy"} +{"epoch": 0.1527165932452276, "step": 105, "batch_size": 64, "mean": 22.631938934326172, "std": 16.87553596496582, "min": -11.806066513061523, "p10": 3.2308113098144533, "median": 20.428775787353516, "p90": 44.92316894531251, "max": 83.43135070800781, "pos_frac": 0.984375, "sample": [18.841156005859375, 15.715576171875, 28.528160095214844, 18.2706298828125, 23.10448455810547, 19.689598083496094, 29.0499267578125, 53.79582214355469, 9.911565780639648, 2.675201416015625, 2.7847023010253906, 40.591522216796875, 28.321319580078125, 7.86311149597168, 24.6844482421875, 24.237720489501953, -11.806066513061523, 8.121910095214844, 3.355743408203125, 41.642356872558594, 17.590415954589844, 48.16423034667969, 52.10481262207031, 9.044212341308594, 10.20220947265625, 38.905738830566406, 29.94635009765625, 19.183250427246094, 3.842782974243164, 26.027671813964844, 22.110244750976562, 54.34149169921875, 7.326133728027344, 26.824989318847656, 9.032142639160156, 11.149940490722656, 27.973777770996094, 46.29473876953125, 21.167953491210938, 10.814022064208984, 29.46440887451172, 57.24153137207031, 18.031457901000977, 18.30949592590332, 6.2127685546875, 27.781509399414062, 1.4812507629394531, 33.6888427734375, 35.571834564208984, 32.30755615234375, 12.924758911132812, 14.987442016601562, 1.53662109375, 3.1772689819335938, 25.4005126953125, 24.347183227539062, 19.50482749938965, 41.72283935546875, 25.438339233398438, 83.43135070800781, 9.948932647705078, 0.33469581604003906, 7.841331481933594, 36.33116149902344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000105.npy"} +{"epoch": 0.15418502202643172, "step": 106, "batch_size": 64, "mean": 14.780631065368652, "std": 14.420707702636719, "min": -11.831985473632812, "p10": -2.3717979431152334, "median": 14.406625747680664, "p90": 34.00704612731934, "max": 66.73580169677734, "pos_frac": 0.828125, "sample": [13.291328430175781, 1.7923412322998047, 2.3025341033935547, 1.380044937133789, 14.854162216186523, -1.155752182006836, 25.911788940429688, -2.8395614624023438, 18.546951293945312, 32.99382400512695, 23.047542572021484, 18.79132080078125, 4.069145202636719, 38.9862060546875, 10.747222900390625, 32.65647888183594, -0.44408416748046875, 9.797447204589844, 25.838043212890625, -0.20041847229003906, 35.50135803222656, 14.30881118774414, 3.8092193603515625, 34.4412841796875, 10.435953140258789, 17.55170440673828, 9.111351013183594, -6.6809539794921875, 18.50536346435547, -4.9837188720703125, 18.76694679260254, 13.510540008544922, 17.062274932861328, 22.37569808959961, 3.1444969177246094, 7.956169128417969, 13.472610473632812, -1.2803497314453125, 34.95074462890625, 38.39080047607422, 14.504440307617188, 15.667957305908203, -11.831985473632812, 0.8845596313476562, 66.73580169677734, 13.118309020996094, 17.869102478027344, 25.923805236816406, 4.76971435546875, 4.1929168701171875, 3.1536331176757812, 20.319198608398438, 44.884849548339844, 21.741788864135742, 25.8665771484375, 20.07220458984375, -3.0828781127929688, 16.070419311523438, 5.850555419921875, 19.646011352539062, 32.51692199707031, -3.6258621215820312, -3.2850799560546875, 23.280555725097656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000106.npy"} +{"epoch": 0.15565345080763582, "step": 107, "batch_size": 64, "mean": 16.76299285888672, "std": 15.433250427246094, "min": -16.472091674804688, "p10": -1.0375350952148417, "median": 12.995878219604492, "p90": 38.77900199890138, "max": 55.06269836425781, "pos_frac": 0.890625, "sample": [27.363006591796875, 42.24884033203125, 21.733200073242188, 34.28498840332031, 12.23681640625, 12.79037094116211, 23.380401611328125, 1.0135345458984375, -16.472091674804688, 2.3941574096679688, 7.666009902954102, 33.76130676269531, -5.1421051025390625, 21.19406509399414, 6.958396911621094, 7.844413757324219, 7.674236297607422, 42.720436096191406, 3.523214340209961, -4.2885284423828125, 35.654823303222656, 30.023414611816406, 36.47918701171875, 2.027162551879883, -2.746063232421875, 3.2206153869628906, 8.370010375976562, 2.8293914794921875, 55.06269836425781, 25.928977966308594, 18.94915199279785, 13.201385498046875, 30.254112243652344, 15.18682861328125, 26.67047882080078, 46.27821350097656, 9.188159942626953, 12.094032287597656, 7.95982551574707, -8.155509948730469, 25.923446655273438, -8.874343872070312, 27.39563751220703, 39.7646369934082, 10.01910400390625, 40.30463409423828, 6.1192779541015625, 47.9573974609375, 27.112350463867188, 25.94676971435547, 1.136566162109375, 6.994424819946289, 16.7132568359375, -1.91656494140625, 11.066967010498047, 5.37432861328125, 6.781972885131836, 30.265228271484375, 15.995010375976562, 19.626712799072266, 23.193695068359375, 24.89276123046875, 6.9338531494140625, 12.742874145507812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000107.npy"} +{"epoch": 0.15712187958883994, "step": 108, "batch_size": 64, "mean": 17.171451568603516, "std": 19.241872787475586, "min": -12.206710815429688, "p10": -2.9826507568359375, "median": 12.531774520874023, "p90": 47.814797973632814, "max": 62.72894287109375, "pos_frac": 0.78125, "sample": [19.371261596679688, 46.10026550292969, 20.122268676757812, -8.135040283203125, -9.7625732421875, 31.926361083984375, 16.902660369873047, 2.9941787719726562, 62.591888427734375, -2.7616043090820312, 8.842910766601562, 49.268531799316406, 2.2182388305664062, -0.5796356201171875, 1.5184669494628906, 22.089439392089844, -0.25026512145996094, 34.03733825683594, 52.93180847167969, 50.20030212402344, 51.22834777832031, 28.627777099609375, -1.6878700256347656, -7.18864631652832, 17.32206153869629, 6.523109436035156, 12.40292739868164, 9.786285400390625, 46.64715576171875, 7.2580413818359375, 6.177297592163086, -1.0799026489257812, 38.74525451660156, -3.0773849487304688, 62.72894287109375, 3.47711181640625, 32.325775146484375, -3.153209686279297, -0.0245513916015625, 3.7015609741210938, 17.23843002319336, 6.656982421875, 18.298389434814453, 48.315216064453125, 14.363212585449219, 1.7998638153076172, 33.755409240722656, 27.679580688476562, 9.665340423583984, -12.206710815429688, 3.454864501953125, 29.684083938598633, 40.21929931640625, 23.98577880859375, 3.2472763061523438, -1.6530590057373047, 25.9730224609375, 9.180757522583008, 35.817604064941406, -11.316875457763672, 17.465957641601562, 27.226932525634766, 12.660621643066406, 7.094024658203125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000108.npy"} +{"epoch": 0.15859030837004406, "step": 109, "batch_size": 64, "mean": 18.880970001220703, "std": 15.617107391357422, "min": -8.151317596435547, "p10": 1.3010925292968751, "median": 17.8643159866333, "p90": 37.82953033447266, "max": 59.952484130859375, "pos_frac": 0.90625, "sample": [24.149967193603516, 3.0792198181152344, 36.652801513671875, 30.550247192382812, 3.4107437133789062, 27.34780502319336, 1.3858795166015625, 11.391559600830078, 8.487373352050781, 25.87554168701172, 45.141845703125, 13.653133392333984, 27.683197021484375, 15.08209228515625, 30.156448364257812, 5.785343170166016, 15.70926284790039, 22.6801815032959, -1.9064140319824219, -2.3064041137695312, 1.2647552490234375, 59.952484130859375, -1.2209625244140625, -2.46441650390625, 18.40797996520996, 2.3456039428710938, 49.55741882324219, 17.98674964904785, 6.422788619995117, -8.151317596435547, 19.00157928466797, 28.88990020751953, 2.005685806274414, 17.74188232421875, 27.4512939453125, 18.66950035095215, 32.80653381347656, 37.96685028076172, 33.98902130126953, 44.06871795654297, 8.269424438476562, 5.50653076171875, 3.214813232421875, 12.835872650146484, 2.46875, 31.808799743652344, 26.909576416015625, 20.78204345703125, 12.07988166809082, 36.1240234375, -3.3714599609375, 16.62677001953125, 19.214496612548828, 42.32545471191406, 47.32215881347656, 7.459495544433594, 37.509117126464844, 13.128837585449219, 35.92096710205078, 37.195899963378906, 4.8481903076171875, 1.890472412109375, 6.656711578369141, 30.95336151123047], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000109.npy"} +{"epoch": 0.16005873715124816, "step": 110, "batch_size": 64, "mean": 17.782527923583984, "std": 18.577707290649414, "min": -23.8824462890625, "p10": 0.677023506164551, "median": 15.044184684753418, "p90": 45.19076461791994, "max": 75.23970031738281, "pos_frac": 0.921875, "sample": [29.693496704101562, 14.7794189453125, 18.403682708740234, 32.628570556640625, 24.36237335205078, 16.35778045654297, 18.70187759399414, 14.914789199829102, -23.526107788085938, 15.173580169677734, 21.744529724121094, 8.757179260253906, 2.1788787841796875, 8.957860946655273, 13.004692077636719, 0.8891792297363281, 13.2977294921875, 75.23970031738281, 46.80913543701172, 12.741813659667969, 2.352712631225586, 0.4381675720214844, 65.16487121582031, 47.083892822265625, 35.157386779785156, 55.9725341796875, 3.6762313842773438, 14.7689208984375, 4.621753692626953, 30.137802124023438, -5.757926940917969, -5.865413665771484, 3.750194549560547, 9.583755493164062, 4.178371429443359, 55.8890380859375, 21.45433807373047, 41.41456604003906, 29.751632690429688, 0.9749717712402344, 27.063636779785156, 1.859696388244629, 7.730461120605469, 24.654048919677734, 10.350528717041016, 21.91187286376953, 33.653297424316406, -23.8824462890625, 20.63768768310547, -4.2315216064453125, 11.522483825683594, 1.0471248626708984, 17.625892639160156, 12.02490234375, 27.1016845703125, 16.21619415283203, 15.214271545410156, 21.742752075195312, 8.15781021118164, 29.282264709472656, 19.910385131835938, 0.5860996246337891, 9.008552551269531, 49.03617858886719], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000110.npy"} +{"epoch": 0.16152716593245228, "step": 111, "batch_size": 64, "mean": 18.308712005615234, "std": 18.513505935668945, "min": -27.56195068359375, "p10": -1.869922256469726, "median": 14.973943710327148, "p90": 42.6937454223633, "max": 81.57937622070312, "pos_frac": 0.875, "sample": [81.57937622070312, -5.58349609375, -27.56195068359375, 27.944610595703125, -2.2121353149414062, 19.309242248535156, 1.5364151000976562, 16.41156005859375, 6.355857849121094, 46.14433288574219, 15.226665496826172, 34.840606689453125, 17.31610870361328, 17.234283447265625, 12.289947509765625, 24.618072509765625, 44.826751708984375, 14.721221923828125, 36.72552490234375, 6.660951614379883, 21.169063568115234, -5.375587463378906, 1.6315498352050781, 3.805694580078125, 27.125625610351562, 37.79911804199219, 11.396167755126953, 38.02099609375, -4.5058135986328125, 9.628341674804688, 8.0665283203125, 12.144815444946289, 44.46588134765625, 9.916606903076172, 54.321868896484375, 14.534584045410156, 11.569557189941406, 21.453826904296875, 23.248260498046875, 52.548118591308594, 12.276374816894531, 0.4803619384765625, 10.093420028686523, 8.156177520751953, 53.38032531738281, 13.603347778320312, 19.297210693359375, 13.945281982421875, 36.618896484375, 1.830587387084961, 38.55876159667969, 18.82400894165039, 34.60466766357422, 37.77738952636719, 18.429122924804688, 3.535724639892578, -11.345108032226562, 18.347339630126953, 9.58256721496582, 21.59703826904297, -1.3960609436035156, 33.42932891845703, -2.0730056762695312, 0.8545455932617188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000111.npy"} +{"epoch": 0.16299559471365638, "step": 112, "batch_size": 64, "mean": 13.259477615356445, "std": 15.364657402038574, "min": -21.741172790527344, "p10": -3.7061618804931635, "median": 10.741082191467285, "p90": 37.50633850097657, "max": 53.36078643798828, "pos_frac": 0.84375, "sample": [9.764759063720703, 46.902374267578125, 25.727676391601562, -2.792003631591797, -9.92013168334961, 11.717405319213867, 12.937736511230469, 11.9197998046875, 3.0902862548828125, -4.684051513671875, 43.767974853515625, 36.67628479003906, 1.3437385559082031, 3.8990402221679688, 37.95904541015625, 33.9885368347168, 17.07935333251953, 25.91358184814453, 20.181644439697266, 17.772850036621094, 24.869094848632812, 28.70608139038086, 29.46137237548828, 7.603813171386719, 15.111869812011719, 18.77992057800293, 14.719879150390625, 2.405475616455078, -7.210670471191406, 11.848747253417969, 1.8064098358154297, -3.3990440368652344, 11.721443176269531, 39.796669006347656, 53.36078643798828, -6.584877014160156, 20.99903106689453, 8.379207611083984, 25.781570434570312, 4.471385955810547, -21.741172790527344, 37.86207580566406, 5.487209320068359, 3.8497467041015625, 6.422813415527344, 3.967723846435547, 22.43376922607422, 2.5390357971191406, 15.434669494628906, 38.3857421875, 3.3682479858398438, 1.9244422912597656, 23.135385513305664, -3.8377838134765625, 2.9487171173095703, -4.568565368652344, 3.2585792541503906, 0.7556686401367188, 26.81310272216797, 0.7981662750244141, 9.681451797485352, 5.152345657348633, -1.2239990234375, 19.885101318359375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000112.npy"} +{"epoch": 0.1644640234948605, "step": 113, "batch_size": 64, "mean": 14.937784194946289, "std": 13.543709754943848, "min": -7.895599365234375, "p10": -2.069530296325683, "median": 13.42149543762207, "p90": 34.40156402587892, "max": 43.011505126953125, "pos_frac": 0.859375, "sample": [-7.895599365234375, 27.30297088623047, -1.5990962982177734, 29.920654296875, 7.6568756103515625, 8.673973083496094, 16.79857063293457, 24.768943786621094, 11.938127517700195, 6.031898498535156, 3.6355819702148438, 23.87701416015625, 20.585891723632812, 40.165985107421875, 12.679227828979492, -4.684965133666992, 8.977581024169922, -4.636344909667969, 9.862037658691406, 42.709136962890625, -5.8679656982421875, 17.09160614013672, 27.508079528808594, 1.4591827392578125, 22.696510314941406, 13.388004302978516, 7.761007308959961, 36.361244201660156, 14.400062561035156, -3.3894424438476562, 6.595344543457031, 40.31024932861328, 14.958904266357422, -1.0915908813476562, 13.454986572265625, 27.099510192871094, 6.8952178955078125, 28.85790252685547, 30.140594482421875, 7.5136260986328125, 21.959259033203125, 31.40612030029297, 1.0389118194580078, 2.850362777709961, -4.90399169921875, 27.997920989990234, 15.705284118652344, 15.9180908203125, -2.2711448669433594, 2.5880470275878906, 43.011505126953125, 3.4605369567871094, 16.744796752929688, 29.380292892456055, 35.685325622558594, 27.649959564208984, 20.42652702331543, 36.54020690917969, 0.5989246368408203, 8.691726684570312, 6.470703125, 2.218547821044922, 8.62735366821289, 21.311416625976562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000113.npy"} +{"epoch": 0.16593245227606462, "step": 114, "batch_size": 64, "mean": 20.203205108642578, "std": 20.695791244506836, "min": -8.657783508300781, "p10": -0.3929000854492183, "median": 18.24544334411621, "p90": 40.8492790222168, "max": 96.96954345703125, "pos_frac": 0.890625, "sample": [57.538299560546875, 45.64160919189453, 29.70200538635254, 38.75945281982422, 8.195175170898438, 53.70361328125, 4.5404205322265625, 19.56237030029297, 7.69989013671875, 19.939117431640625, -7.385564804077148, 18.781768798828125, 29.625587463378906, 1.6256179809570312, -2.2399139404296875, 31.063156127929688, 5.624523162841797, -0.5807685852050781, 17.709117889404297, 35.67584228515625, 11.049076080322266, 8.73251724243164, -4.73724365234375, 40.09630584716797, 29.16158676147461, 31.973854064941406, 12.501518249511719, 11.006359100341797, 5.30108642578125, 27.794063568115234, 28.59941864013672, 1.6344757080078125, -0.8755645751953125, 29.134841918945312, -2.193807601928711, 5.05113410949707, 8.949384689331055, 19.934783935546875, 34.81175231933594, 27.211212158203125, 25.256732940673828, 29.22875213623047, 3.1364212036132812, 17.579313278198242, 12.35329818725586, 2.3945388793945312, 82.09562683105469, 2.4825973510742188, 20.379287719726562, 66.8600845336914, 3.8777809143066406, 29.000898361206055, 0.17650604248046875, 0.045459747314453125, 96.96954345703125, 36.9603385925293, 2.4127464294433594, 18.869781494140625, 27.737991333007812, -8.657783508300781, 7.310340881347656, 41.17198181152344, 4.016613006591797, 31.028213500976562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000114.npy"} +{"epoch": 0.16740088105726872, "step": 115, "batch_size": 64, "mean": 15.270513534545898, "std": 18.626760482788086, "min": -28.11772918701172, "p10": -1.5947526931762683, "median": 11.810723304748535, "p90": 44.182558441162115, "max": 80.57516479492188, "pos_frac": 0.84375, "sample": [5.185342788696289, -2.193899154663086, 45.97257614135742, 16.539276123046875, 8.067718505859375, 1.3582839965820312, 2.267333984375, 5.750560760498047, 0.6764926910400391, 14.672019958496094, 6.705661773681641, -2.8873062133789062, 22.07503890991211, 18.732566833496094, 23.163835525512695, 54.46513366699219, 13.386232376098633, 3.4476699829101562, 80.57516479492188, -4.48194694519043, 22.55359649658203, 1.0098724365234375, 46.24891662597656, 25.441112518310547, 43.18328857421875, 3.1939849853515625, 18.1983642578125, 26.747974395751953, 1.6853694915771484, 26.668502807617188, 50.64814758300781, 28.609588623046875, 11.320655822753906, -28.11772918701172, 5.4793243408203125, 18.795455932617188, 21.071212768554688, 8.422500610351562, 12.300790786743164, 21.21538543701172, 14.416000366210938, 13.987960815429688, 20.386245727539062, 9.164505004882812, 54.085784912109375, -0.089202880859375, 7.4886322021484375, 5.9810943603515625, 0.5095977783203125, 6.2611236572265625, 8.944036483764648, 30.32122802734375, -0.45839691162109375, 21.30129623413086, -5.443401336669922, 44.610816955566406, 28.1640625, -2.0817623138427734, 30.348220825195312, -23.7469482421875, 28.6058349609375, -0.23140716552734375, 4.394174575805664, 2.2392578125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000115.npy"} +{"epoch": 0.16886930983847284, "step": 116, "batch_size": 64, "mean": 15.100841522216797, "std": 13.967205047607422, "min": -6.018527984619141, "p10": 0.050526809692383434, "median": 13.340904235839844, "p90": 35.974467468261736, "max": 73.95278930664062, "pos_frac": 0.890625, "sample": [5.676214218139648, 6.251064300537109, -6.018527984619141, -0.20771026611328125, 14.536825180053711, 14.437187194824219, 12.530441284179688, 9.759511947631836, 21.780248641967773, 22.31513214111328, 8.965263366699219, 1.9864253997802734, 15.783355712890625, 14.189064025878906, 1.209686279296875, -2.952423095703125, 17.59131622314453, 8.093299865722656, 25.451881408691406, 37.572113037109375, 11.593605041503906, 17.872848510742188, 7.397491455078125, 1.8927383422851562, 19.238677978515625, 10.587509155273438, 23.32489013671875, 37.7017822265625, 5.144233703613281, 6.6038055419921875, 43.82014465332031, 20.11621856689453, 10.382802963256836, 14.1513671875, -0.6609516143798828, -3.2892684936523438, 10.914497375488281, 15.34149169921875, 11.591726303100586, 38.449066162109375, 1.2332134246826172, 32.24662780761719, 9.071174621582031, 22.906631469726562, 19.095306396484375, 5.086585998535156, 30.196083068847656, 8.262252807617188, -2.3177242279052734, 24.126754760742188, 19.45962905883789, -3.9874916076660156, 73.95278930664062, 18.633556365966797, 16.14718246459961, 17.445697784423828, 12.098320007324219, 14.347335815429688, 1.6884632110595703, 7.93310546875, 0.6530799865722656, 28.158477783203125, 38.60652160644531, 40.28522491455078], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000116.npy"} +{"epoch": 0.17033773861967694, "step": 117, "batch_size": 64, "mean": 14.997328758239746, "std": 15.858565330505371, "min": -13.7943115234375, "p10": -0.38811874389648404, "median": 12.972740173339844, "p90": 32.7546989440918, "max": 68.41575622558594, "pos_frac": 0.875, "sample": [8.30661392211914, 15.071149826049805, 15.525833129882812, 9.851924896240234, 9.814170837402344, 4.699462890625, 13.339231491088867, 3.0282135009765625, 7.74481201171875, 14.988653182983398, 22.883190155029297, 32.84397888183594, 31.21820068359375, 68.41575622558594, 25.267967224121094, 8.887657165527344, 0.0006561279296875, 29.663925170898438, 27.667221069335938, -13.7943115234375, -5.2970733642578125, 4.421684265136719, 13.133987426757812, 21.514537811279297, 24.96084976196289, 3.4510955810546875, 11.54754638671875, 1.1403579711914062, 16.504989624023438, 6.547626495361328, 17.181791305541992, 43.66077423095703, -0.03972625732421875, 31.169845581054688, 37.748023986816406, 8.482269287109375, 66.80368041992188, 16.261428833007812, 4.6029815673828125, 16.270343780517578, 1.7730293273925781, 2.428234100341797, 3.722352981567383, -2.6559600830078125, -8.680747985839844, 6.855152130126953, 32.54637908935547, 15.519638061523438, 37.635719299316406, 15.857294082641602, -5.212799072265625, 2.605152130126953, 3.948169708251953, 24.78579330444336, 46.561248779296875, 24.736572265625, 5.908605575561523, 20.836929321289062, 12.811492919921875, 17.94170379638672, 12.17626953125, -0.5374298095703125, -4.9058685302734375, 17.680770874023438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000117.npy"} +{"epoch": 0.17180616740088106, "step": 118, "batch_size": 64, "mean": 13.979833602905273, "std": 12.897310256958008, "min": -7.94024658203125, "p10": 0.25027084350585965, "median": 12.852465629577637, "p90": 28.41146125793457, "max": 53.79058837890625, "pos_frac": 0.90625, "sample": [-1.343170166015625, 8.64689826965332, 10.464056015014648, -6.062957763671875, 11.681676864624023, 14.970149993896484, 24.065513610839844, 22.28004264831543, 25.496854782104492, 28.39868927001953, 25.157974243164062, 8.977386474609375, 40.87542724609375, 10.548057556152344, 2.4372406005859375, 15.538808822631836, 5.607349395751953, 6.2277984619140625, 15.162738800048828, 53.79058837890625, 23.250534057617188, -0.9755229949951172, 0.5365753173828125, 4.4899139404296875, -7.94024658203125, 7.716344833374023, 3.4729347229003906, 9.873025894165039, 0.5966453552246094, 3.8109588623046875, 8.359931945800781, 18.121665954589844, -6.473419189453125, 28.416934967041016, 8.1749267578125, 15.806625366210938, 9.382637023925781, 14.452545166015625, 41.49578857421875, 3.8924179077148438, 4.443153381347656, -2.507549285888672, 14.160614013671875, 14.02325439453125, 40.96595764160156, 27.824424743652344, 44.0472412109375, 22.689773559570312, 30.798622131347656, 9.03089714050293, 1.7185478210449219, 21.86396026611328, 22.399017333984375, 17.142913818359375, 18.567657470703125, 15.353843688964844, 14.936513900756836, 7.059318542480469, 1.3912544250488281, 18.361045837402344, 15.302215576171875, 25.100868225097656, 0.13556671142578125, 0.517913818359375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000118.npy"} +{"epoch": 0.17327459618208516, "step": 119, "batch_size": 64, "mean": 14.818709373474121, "std": 16.255353927612305, "min": -11.993217468261719, "p10": -4.438190460205078, "median": 12.3642578125, "p90": 35.478094482421874, "max": 72.39674377441406, "pos_frac": 0.84375, "sample": [14.991790771484375, 9.237899780273438, 4.838104248046875, 47.9593505859375, 29.0115966796875, 3.6142578125, 8.64773178100586, 19.116798400878906, -2.384014129638672, 25.51844024658203, 7.190559387207031, 3.5510940551757812, 53.09906005859375, 25.977645874023438, 0.2644081115722656, 39.58892822265625, 12.987255096435547, 7.470680236816406, 72.39674377441406, 23.883712768554688, 18.974136352539062, 26.86359405517578, 25.71691131591797, 5.908237457275391, 12.83984375, 17.056564331054688, -11.5667724609375, 3.8379287719726562, 24.977096557617188, 24.557525634765625, 7.1673431396484375, -4.087394714355469, 35.06044006347656, 10.915790557861328, 35.65708923339844, -9.156883239746094, 20.29950714111328, 13.859054565429688, 2.368257522583008, 5.481048583984375, 13.650588989257812, 7.686637878417969, -4.588531494140625, 36.484336853027344, 10.52044677734375, 18.375946044921875, 10.265029907226562, -8.465560913085938, 31.163475036621094, 23.51260757446289, -9.629257202148438, -9.35833740234375, 11.606529235839844, 21.783203125, 22.0162353515625, 16.203779220581055, 19.746986389160156, 5.204174041748047, 45.5733528137207, -0.04185676574707031, -11.993217468261719, 1.6483001708984375, 11.888671875, 11.452468872070312], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000119.npy"} +{"epoch": 0.17474302496328928, "step": 120, "batch_size": 64, "mean": 18.60680389404297, "std": 17.107524871826172, "min": -12.079910278320312, "p10": -1.066276168823242, "median": 17.023963928222656, "p90": 43.022140502929695, "max": 60.889739990234375, "pos_frac": 0.875, "sample": [56.59379577636719, 26.372711181640625, 19.702072143554688, 43.530487060546875, 12.89767837524414, 10.285863876342773, -1.02099609375, 41.73074722290039, 12.835441589355469, 20.169410705566406, 28.49066162109375, 55.17486572265625, 23.18999481201172, 18.552810668945312, 3.7882423400878906, 14.7431640625, 31.477981567382812, 10.963310241699219, 22.74517059326172, -4.163825988769531, -10.529975891113281, 53.31547927856445, 21.048019409179688, 23.925495147705078, 19.286060333251953, 4.1808624267578125, 12.4178466796875, 8.026466369628906, 41.83599853515625, -12.079910278320312, -4.1842041015625, 29.546600341796875, 21.273887634277344, 1.9191093444824219, 40.15351867675781, 1.9482784271240234, 56.386444091796875, 24.10666275024414, 23.146804809570312, 0.22762107849121094, -1.0856819152832031, 4.721706390380859, 26.67822265625, 33.62432861328125, 17.193557739257812, 16.8543701171875, 44.46913146972656, 60.889739990234375, 13.230636596679688, 17.608234405517578, 0.6151046752929688, 23.23064422607422, -1.91455078125, 16.118194580078125, 16.14234161376953, 5.0640716552734375, 7.729911804199219, 28.19989013671875, 15.196334838867188, 2.7728652954101562, 27.83234405517578, -4.546104431152344, 12.9696044921875, 3.2298355102539062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000120.npy"} +{"epoch": 0.1762114537444934, "step": 121, "batch_size": 64, "mean": 19.640243530273438, "std": 18.757091522216797, "min": -6.694976806640625, "p10": -1.167340850830078, "median": 15.933277130126953, "p90": 46.09449844360353, "max": 68.93438720703125, "pos_frac": 0.875, "sample": [2.8923702239990234, 9.991462707519531, 1.0039520263671875, 56.16680908203125, -2.429248809814453, 3.8791275024414062, 27.431320190429688, -1.3301620483398438, 44.126853942871094, 27.841690063476562, 9.162132263183594, 23.651229858398438, 49.87205505371094, 43.46564483642578, 18.747596740722656, -5.748649597167969, 17.239662170410156, 30.02779769897461, 24.79637908935547, 0.7359790802001953, 10.681783676147461, 43.539249420166016, -2.4996414184570312, 27.06927490234375, 46.937774658203125, 18.906967163085938, 13.365890502929688, 0.6294746398925781, 38.936431884765625, 4.01268196105957, 14.62689208984375, 3.7208633422851562, 26.85662078857422, 18.568328857421875, 6.32465934753418, 42.80253601074219, 1.8496475219726562, 52.63299560546875, 14.597991943359375, 68.93438720703125, 0.90576171875, 40.556053161621094, -0.9606552124023438, 11.39959716796875, 8.1170654296875, 9.651435852050781, 37.29691696166992, 5.288856506347656, 13.381568908691406, 18.047470092773438, 6.681316375732422, 2.0669937133789062, 17.74365234375, 33.76263427734375, 47.85157775878906, 39.785888671875, -6.694976806640625, 21.06768798828125, -1.25592041015625, 29.61072540283203, 64.88694763183594, 6.509529113769531, -3.511798858642578, 20.768386840820312], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000121.npy"} +{"epoch": 0.1776798825256975, "step": 122, "batch_size": 64, "mean": 24.55230712890625, "std": 18.17508316040039, "min": -8.086063385009766, "p10": 3.2047359466552736, "median": 21.91520881652832, "p90": 48.14146423339844, "max": 71.13600158691406, "pos_frac": 0.9375, "sample": [34.23554992675781, 3.4979095458984375, 13.026756286621094, 34.33427429199219, 10.734800338745117, 17.398590087890625, 38.56182861328125, 31.859886169433594, 18.53940200805664, 38.68653869628906, 7.842874526977539, 36.79210662841797, 32.412925720214844, 59.738525390625, 41.74085235595703, 22.076496124267578, 7.2319183349609375, 34.89275360107422, 48.4869384765625, 29.8126220703125, 1.9067230224609375, 47.335357666015625, 5.253932952880859, 5.612266540527344, 16.399246215820312, 19.36756134033203, 15.879035949707031, 41.167449951171875, 27.121665954589844, 25.96851348876953, 33.4615478515625, -5.545175552368164, 58.983055114746094, 2.4793834686279297, 21.636520385742188, 52.31475830078125, 24.099273681640625, 32.392616271972656, 32.10906219482422, 46.48365783691406, 7.423131942749023, 17.46185302734375, -8.086063385009766, 62.56268310546875, 21.185623168945312, 18.60052490234375, 20.26526641845703, 6.694103240966797, 30.7193603515625, -5.7531890869140625, -0.44530487060546875, 11.025421142578125, 5.255655288696289, 42.608978271484375, 71.13600158691406, 4.511287689208984, 40.71983337402344, 21.753921508789062, 3.079090118408203, 3.508525848388672, 48.7806396484375, 33.36979675292969, 13.21359634399414, 35.42689514160156], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000122.npy"} +{"epoch": 0.17914831130690162, "step": 123, "batch_size": 64, "mean": 17.00613021850586, "std": 19.7613582611084, "min": -29.73029327392578, "p10": -2.6705297470092764, "median": 15.122533798217773, "p90": 40.807359313964845, "max": 75.99698638916016, "pos_frac": 0.875, "sample": [26.45958709716797, 24.409500122070312, 15.730567932128906, 3.9194869995117188, 13.23223876953125, 16.452377319335938, 34.5545654296875, 26.7449951171875, 28.143577575683594, 19.67363739013672, 1.2033119201660156, 11.919578552246094, 8.824615478515625, 0.6778030395507812, 53.42677307128906, 21.323074340820312, 11.252971649169922, 3.6215438842773438, 0.19303131103515625, 17.16413116455078, 3.6249542236328125, 53.617393493652344, 23.147140502929688, -23.253433227539062, 10.389392852783203, 26.35662841796875, 75.99698638916016, 4.484046936035156, 39.628997802734375, 44.91233825683594, -3.0034046173095703, 39.017906188964844, 15.128963470458984, 4.327461242675781, 33.92628479003906, 39.66156005859375, 15.116104125976562, 28.620346069335938, 58.44622802734375, -5.743125915527344, 21.63214111328125, 20.9703369140625, -10.748371124267578, 0.2645111083984375, 29.850059509277344, 6.556182861328125, -13.324501037597656, 2.0394458770751953, -6.471855163574219, 64.85545349121094, -1.8938217163085938, 17.422378540039062, 11.544105529785156, 26.625694274902344, 7.164405822753906, 14.313589096069336, 41.29841613769531, 15.942583084106445, -29.73029327392578, 16.761375427246094, 13.970436096191406, 1.7940559387207031, 0.9062976837158203, 13.319562911987305], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000123.npy"} +{"epoch": 0.18061674008810572, "step": 124, "batch_size": 64, "mean": 21.367599487304688, "std": 23.805362701416016, "min": -17.739791870117188, "p10": -3.344985961914062, "median": 18.257415771484375, "p90": 47.03081359863282, "max": 111.84188842773438, "pos_frac": 0.828125, "sample": [42.20631408691406, -1.5382766723632812, 22.97238540649414, 18.24425506591797, -0.7596454620361328, 51.88459777832031, -17.739791870117188, -0.8712368011474609, 38.903533935546875, 47.6470947265625, 2.9952011108398438, 17.162399291992188, 2.5855865478515625, 18.27057647705078, 18.598480224609375, 90.60543823242188, 36.12818145751953, 8.95254898071289, 12.793380737304688, 27.56768798828125, -5.0108184814453125, -7.123390197753906, 36.35295104980469, 9.423114776611328, 65.07046508789062, 40.7799072265625, 25.792266845703125, 7.420631408691406, 27.950950622558594, 32.23438262939453, -3.5297622680664062, 19.10616111755371, 8.826873779296875, 26.168737411499023, 1.7731857299804688, 81.52078247070312, 28.306568145751953, 47.530792236328125, 9.790214538574219, 41.51263427734375, 34.381568908691406, -2.9138412475585938, 19.129650115966797, -4.23321533203125, 20.006494522094727, 32.973907470703125, -10.66583251953125, 6.8385467529296875, 9.689504623413086, 9.391357421875, 22.207813262939453, 18.853805541992188, -8.630905151367188, 42.121490478515625, 12.558258056640625, 111.84188842773438, 6.084930419921875, 45.86419677734375, 2.6063785552978516, 14.720039367675781, 17.98236846923828, 18.367515563964844, 8.802200317382812, 9.04279899597168], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000124.npy"} +{"epoch": 0.18208516886930984, "step": 125, "batch_size": 64, "mean": 19.78438949584961, "std": 17.413667678833008, "min": -8.573371887207031, "p10": 2.801374435424805, "median": 16.266315460205078, "p90": 47.07278709411624, "max": 67.39120483398438, "pos_frac": 0.9375, "sample": [3.3886375427246094, 18.590194702148438, 15.944869995117188, 1.3178939819335938, 9.248931884765625, 7.580709457397461, 37.60414123535156, -0.8243293762207031, 50.589508056640625, 11.082023620605469, 0.13245773315429688, -6.8089447021484375, 8.961671829223633, 11.981330871582031, 30.17736053466797, 24.2601318359375, 20.9459228515625, 16.58776092529297, 5.08251953125, 10.13980484008789, 49.4296989440918, 9.953149795532227, 10.6654052734375, 16.79052734375, 34.18962097167969, 67.39120483398438, -8.573371887207031, 10.472591400146484, 17.690345764160156, 4.2638092041015625, 11.151451110839844, -8.433158874511719, 10.559577941894531, 3.696775436401367, 17.383739471435547, 61.409393310546875, 4.165805816650391, 54.498802185058594, 36.15602111816406, 5.557880401611328, 6.134864807128906, 41.573326110839844, 36.109375, 36.285560607910156, 49.94380187988281, 21.925704956054688, 32.698631286621094, 9.941072463989258, 39.082305908203125, 25.456321716308594, 11.076995849609375, 31.980297088623047, 12.1964111328125, 7.169374465942383, 23.75092315673828, 17.7181396484375, 25.313461303710938, 22.59130859375, 10.355268478393555, 18.06805419921875, 57.408111572265625, 35.255615234375, 7.214332580566406, 2.5496902465820312], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000125.npy"} +{"epoch": 0.18355359765051396, "step": 126, "batch_size": 64, "mean": 19.074655532836914, "std": 17.02735710144043, "min": -13.571517944335938, "p10": 0.8240037918090828, "median": 17.302658081054688, "p90": 41.606278991699234, "max": 69.21311950683594, "pos_frac": 0.90625, "sample": [37.51531982421875, 23.695823669433594, 5.277423858642578, 27.539779663085938, 6.719573974609375, 12.013954162597656, -3.0527725219726562, 1.5326061248779297, 13.279571533203125, 62.48492431640625, 19.0299072265625, 12.905872344970703, 34.25006103515625, 20.839111328125, 22.336143493652344, -0.1916046142578125, 36.8443603515625, 13.674705505371094, 32.25060272216797, 19.00194549560547, 24.236385345458984, 17.65444564819336, 1.6117191314697266, 8.647930145263672, 18.71637725830078, 44.87945556640625, 1.9217071533203125, 42.90625, 19.36441421508789, 17.775920867919922, 11.002124786376953, 5.900428771972656, 9.19281005859375, 7.200767517089844, 38.57301330566406, 19.532791137695312, 30.35523223876953, -4.7061309814453125, 25.911361694335938, 16.950870513916016, -0.9510040283203125, 6.56121826171875, 57.919342041015625, 12.023393630981445, 13.587274551391602, 5.157833099365234, 8.582279205322266, 6.234992980957031, 30.482337951660156, 44.95063781738281, -13.571517944335938, 69.21311950683594, 20.951568603515625, 12.086441040039062, 8.512405395507812, 25.834304809570312, 18.720779418945312, 28.368576049804688, 10.396602630615234, -5.3748779296875, 0.5203170776367188, 35.45446014404297, 10.281166076660156, 59.2611083984375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000126.npy"} +{"epoch": 0.18502202643171806, "step": 127, "batch_size": 64, "mean": 16.056873321533203, "std": 14.14534854888916, "min": -8.831474304199219, "p10": -1.7270090103149411, "median": 15.299047470092773, "p90": 37.327684020996095, "max": 49.2900390625, "pos_frac": 0.859375, "sample": [16.423660278320312, 29.278724670410156, 0.23836898803710938, 15.320930480957031, -0.33263397216796875, -4.306343078613281, 40.07428741455078, 7.43670654296875, 38.31462860107422, 8.747650146484375, 5.41485595703125, 19.43762969970703, 30.03857421875, -3.7366714477539062, 12.651744842529297, 6.857349395751953, 16.04125213623047, 1.3340301513671875, -5.157440185546875, 33.643341064453125, 10.868614196777344, 36.37457275390625, 28.18244171142578, 15.815948486328125, 29.70968246459961, 43.66114044189453, 20.896381378173828, 23.40777587890625, 8.403129577636719, 15.664745330810547, 41.012229919433594, 15.078483581542969, 29.570556640625, 10.520336151123047, 6.7598419189453125, 25.242332458496094, -1.81494140625, 4.773681640625, 15.277164459228516, -1.5218334197998047, 5.846736907958984, 37.73616027832031, 5.679058074951172, 15.163490295410156, 20.768600463867188, 22.12140655517578, 21.971435546875, 33.06840515136719, 24.49602508544922, 20.876983642578125, -6.4668731689453125, 14.202919006347656, 6.613210678100586, 43.42048645019531, -8.831474304199219, 9.122200012207031, 1.1606521606445312, 49.2900390625, 20.574249267578125, 4.3011016845703125, 17.29647445678711, 23.94442367553711, -3.73956298828125, 3.42071533203125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000127.npy"} +{"epoch": 0.18649045521292218, "step": 128, "batch_size": 64, "mean": 16.93927001953125, "std": 17.68482208251953, "min": -11.791961669921875, "p10": -1.4636808395385739, "median": 11.172279357910156, "p90": 44.822631454467775, "max": 73.08222961425781, "pos_frac": 0.859375, "sample": [13.191211700439453, 9.90194320678711, 39.548057556152344, -10.636039733886719, 43.8994140625, -3.5574913024902344, 45.21829605102539, 39.42198944091797, 8.616996765136719, 11.184623718261719, 46.570037841796875, 10.376312255859375, 22.026771545410156, 9.218502044677734, 1.6379013061523438, 31.526580810546875, 24.100692749023438, 3.7650146484375, 5.575553894042969, 27.200786590576172, -7.291168212890625, 0.06180572509765625, 73.08222961425781, -11.791961669921875, 47.99957275390625, 9.9425048828125, 32.23724365234375, 42.30828094482422, 11.159934997558594, 14.681028366088867, -1.6480903625488281, 0.7930908203125, 26.977705001831055, 8.723419189453125, 1.5594863891601562, 9.381134033203125, 8.328826904296875, 22.72745132446289, 8.447433471679688, 26.038715362548828, 16.39471435546875, -1.652811050415039, 31.71685791015625, -4.802865982055664, 2.468568801879883, 51.36283874511719, 1.9582958221435547, 31.913192749023438, 13.787864685058594, 18.541603088378906, 9.273818969726562, 9.163528442382812, 14.813873291015625, -1.0333919525146484, 7.417442321777344, 13.157180786132812, 9.494720458984375, 22.509033203125, 18.983352661132812, -0.781097412109375, 19.70758056640625, 48.5096435546875, 48.23308563232422, 0.4703636169433594], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000128.npy"} +{"epoch": 0.18795888399412627, "step": 129, "batch_size": 64, "mean": 17.114233016967773, "std": 20.050907135009766, "min": -32.344268798828125, "p10": -3.533080863952636, "median": 14.297990798950195, "p90": 41.12254257202149, "max": 80.16046142578125, "pos_frac": 0.8125, "sample": [-2.4986400604248047, 12.493734359741211, -3.826265335083008, 40.99700164794922, 2.3004913330078125, 20.597259521484375, 3.8612213134765625, 0.424957275390625, 39.80818176269531, 17.945316314697266, 22.66339111328125, 29.94354248046875, 8.57071304321289, 20.160781860351562, 18.40694808959961, 24.813201904296875, 58.1282958984375, 2.420654296875, 6.166961669921875, 29.744680404663086, 47.969482421875, 31.17443084716797, 35.82398986816406, -15.976036071777344, 18.09044647216797, -8.508514404296875, 3.9539260864257812, 9.632244110107422, 80.16046142578125, 47.33600616455078, 11.702682495117188, 20.836807250976562, -2.8489837646484375, 30.77739715576172, -3.9326858520507812, 3.302165985107422, 7.908683776855469, 37.312408447265625, 8.914619445800781, 0.09697723388671875, -6.320915222167969, 10.500150680541992, 13.474990844726562, 35.055477142333984, 54.99156188964844, -32.344268798828125, 9.254053115844727, 4.871984481811523, -0.682373046875, -1.4969062805175781, 33.049339294433594, 51.80754089355469, 22.620872497558594, 18.51996612548828, 28.103126525878906, 4.258995056152344, 32.355567932128906, 15.120990753173828, 1.9589157104492188, 22.898284912109375, 34.51396179199219, -12.777015686035156, 41.17634582519531, -2.4487075805664062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000129.npy"} +{"epoch": 0.1894273127753304, "step": 130, "batch_size": 64, "mean": 22.913349151611328, "std": 20.445497512817383, "min": -24.745529174804688, "p10": -1.164003562927246, "median": 20.502984046936035, "p90": 49.86514434814453, "max": 63.07379913330078, "pos_frac": 0.875, "sample": [29.366539001464844, 23.928112030029297, 3.8792037963867188, 48.81367492675781, 6.154655456542969, 12.646224975585938, 41.36921691894531, 41.07319641113281, 4.964935302734375, 1.962646484375, 16.977964401245117, 12.214794158935547, 19.51488494873047, 34.83164978027344, 63.07379913330078, 6.380317687988281, 49.93391418457031, -1.2131290435791016, 18.40494155883789, 37.18635559082031, 7.156459808349609, 11.51953125, -1.689453125, 29.192733764648438, 35.49224853515625, 13.653312683105469, 24.712806701660156, 51.91169738769531, 23.602935791015625, 55.001007080078125, 46.58654022216797, 48.959869384765625, -1.5593605041503906, 7.3712921142578125, 34.03782653808594, -24.745529174804688, 12.053873062133789, 12.15843391418457, 14.081672668457031, 2.2280120849609375, 24.853912353515625, 20.852428436279297, 36.170066833496094, 58.8460693359375, 54.08715057373047, -6.533323287963867, 62.45941162109375, -7.702522277832031, -1.04937744140625, 18.636581420898438, 28.06100845336914, 11.40829849243164, 49.655784606933594, 6.873863220214844, 35.88789367675781, -18.781837463378906, 41.00047302246094, 3.0073795318603516, 21.582473754882812, 46.519317626953125, 49.704681396484375, 30.856590270996094, 20.153539657592773, 6.7147064208984375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000130.npy"} +{"epoch": 0.19089574155653452, "step": 131, "batch_size": 64, "mean": 20.071718215942383, "std": 17.16399574279785, "min": -14.5615234375, "p10": 2.0814064025878922, "median": 17.705707550048828, "p90": 45.24000930786133, "max": 65.72723388671875, "pos_frac": 0.9375, "sample": [20.265316009521484, 15.916746139526367, 20.460702896118164, 16.149974822998047, 51.08819580078125, -14.5615234375, 5.886669158935547, 13.602874755859375, 22.685531616210938, 29.730728149414062, 48.333282470703125, 10.893867492675781, 10.730361938476562, 4.761320114135742, 17.574783325195312, 3.96563720703125, 9.071056365966797, -2.3183155059814453, 39.1544189453125, 18.391632080078125, 20.851776123046875, 41.03276062011719, 11.066783905029297, 1.1534805297851562, 17.836631774902344, 3.4603271484375, 45.27325439453125, 5.94611930847168, 34.6700439453125, 23.287065505981445, 13.440488815307617, -12.204208374023438, 45.162437438964844, 1.4904403686523438, 65.72723388671875, 0.7744102478027344, 17.407455444335938, 5.953327178955078, 48.012481689453125, 15.732614517211914, 23.92828369140625, 19.28866958618164, 61.241546630859375, 9.694644927978516, -7.805107116699219, 26.1768798828125, 7.0194549560546875, 27.46283721923828, 3.653169631958008, 11.74282455444336, 4.330684661865234, 30.779266357421875, 9.120361328125, 49.11585998535156, 27.735549926757812, 37.401397705078125, 25.28839874267578, 24.677772521972656, 8.221590042114258, 24.681625366210938, 19.875633239746094, 40.038978576660156, 43.001129150390625, 10.06036376953125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000131.npy"} +{"epoch": 0.19236417033773862, "step": 132, "batch_size": 64, "mean": 19.139083862304688, "std": 17.331829071044922, "min": -5.791587829589844, "p10": 1.309801483154297, "median": 15.264236450195312, "p90": 43.261308288574234, "max": 73.47293090820312, "pos_frac": 0.921875, "sample": [9.561504364013672, 4.311084747314453, 48.27348327636719, -5.791587829589844, 17.93781280517578, 27.664024353027344, 21.380847930908203, 71.065185546875, 61.889068603515625, 5.682643890380859, 20.95184326171875, 23.694984436035156, 1.119913101196289, 9.073272705078125, -0.0846710205078125, 11.958438873291016, 39.943603515625, 12.324390411376953, 16.161651611328125, 4.557992935180664, 5.933811187744141, 3.0295791625976562, 29.50843048095703, 37.170310974121094, 1.3752822875976562, 13.6270751953125, 5.760957717895508, 31.21918487548828, 1.28173828125, 9.503303527832031, 13.924308776855469, 2.8864593505859375, 6.315860748291016, 24.19707489013672, 32.33559799194336, 30.045997619628906, 17.53569793701172, 12.303211212158203, 26.225040435791016, 25.44662094116211, 16.9116268157959, 20.504226684570312, 26.400272369384766, 35.302955627441406, -4.286506652832031, 13.438518524169922, 46.133277893066406, -4.345466613769531, 5.882717132568359, 32.206520080566406, -1.3132953643798828, 28.100936889648438, 44.68318176269531, 4.0767974853515625, 13.047088623046875, 6.9593658447265625, 19.548583984375, 23.065994262695312, 14.3668212890625, 9.787796020507812, 47.278228759765625, 2.9474143981933594, 19.43030548095703, 73.47293090820312], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000132.npy"} +{"epoch": 0.19383259911894274, "step": 133, "batch_size": 64, "mean": 17.867952346801758, "std": 19.537412643432617, "min": -13.41998291015625, "p10": -0.38848419189453115, "median": 13.57284927368164, "p90": 45.056110382080085, "max": 73.07952880859375, "pos_frac": 0.875, "sample": [19.526992797851562, 41.11528015136719, 14.015823364257812, 26.774612426757812, 5.365390777587891, -4.2296295166015625, 7.66552734375, 5.6786956787109375, -11.410308837890625, 53.34089660644531, 16.993896484375, 25.28357696533203, 1.272979736328125, 45.65874481201172, -13.066299438476562, 0.9813461303710938, 20.544464111328125, -0.43719482421875, 60.083740234375, 10.686002731323242, 37.53512191772461, 11.343536376953125, 3.9093170166015625, 33.60771179199219, 1.8182754516601562, 35.98521423339844, 14.656394958496094, 23.798099517822266, 33.606712341308594, 16.765396118164062, 13.129875183105469, 4.718051910400391, 11.221418380737305, 63.82679748535156, 3.39892578125, 23.10993194580078, 22.957809448242188, 0.9936370849609375, 55.71124267578125, 22.813922882080078, 33.045047760009766, 14.501899719238281, 5.833681106567383, 73.07952880859375, -13.41998291015625, 1.2357254028320312, 21.639144897460938, 47.266937255859375, -0.2748260498046875, 5.929676055908203, 9.717544555664062, 32.19117736816406, -10.828323364257812, 43.64996337890625, 27.369178771972656, -3.1688079833984375, 43.53074645996094, 12.948684692382812, 22.58885955810547, 7.746040344238281, 3.0234222412109375, 0.14175796508789062, 3.874032974243164, 1.1759490966796875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000133.npy"} +{"epoch": 0.19530102790014683, "step": 134, "batch_size": 64, "mean": 19.3120174407959, "std": 15.649565696716309, "min": -2.265575408935547, "p10": 3.743128395080567, "median": 12.93411636352539, "p90": 42.590901184082036, "max": 61.168853759765625, "pos_frac": 0.984375, "sample": [3.50262451171875, 4.396240234375, 12.793724060058594, 43.18390655517578, 60.981475830078125, 8.696495056152344, 18.810733795166016, 7.046581268310547, 6.410602569580078, 13.681751251220703, 22.953903198242188, 9.611648559570312, 5.468992233276367, 7.6067962646484375, 20.354347229003906, 14.531524658203125, 4.304304122924805, 27.947052001953125, 23.641677856445312, 6.192859649658203, 7.686985015869141, 1.7719879150390625, 11.772514343261719, 21.581180572509766, 56.92341613769531, 26.80487823486328, 41.118896484375, 44.405941009521484, 27.233123779296875, 10.721317291259766, 3.3946380615234375, 36.15673828125, 7.790691375732422, 12.417705535888672, 33.952117919921875, 41.20722198486328, 52.62232208251953, 5.165363311767578, 22.545513153076172, 7.466609954833984, 7.97650146484375, 13.074508666992188, 9.925346374511719, 7.656904220581055, 61.168853759765625, 18.44516372680664, 28.683151245117188, 9.545646667480469, 28.226333618164062, 2.9520626068115234, 12.618431091308594, 3.4773101806640625, 1.0598678588867188, 46.38372802734375, 33.94384765625, 20.79448699951172, 26.448684692382812, 7.019184112548828, 12.283912658691406, 24.218650817871094, -2.265575408935547, 18.805850982666016, 11.926738739013672, 36.74310302734375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000134.npy"} +{"epoch": 0.19676945668135096, "step": 135, "batch_size": 64, "mean": 20.7138614654541, "std": 19.68378257751465, "min": -16.650279998779297, "p10": 1.4211303710937504, "median": 16.666990280151367, "p90": 49.81827011108399, "max": 70.25750732421875, "pos_frac": 0.9375, "sample": [23.132003784179688, 27.828262329101562, 1.0589637756347656, 48.211029052734375, 22.381134033203125, 33.36766815185547, 66.48635864257812, 19.31067657470703, 55.568206787109375, 61.91825866699219, 49.28528594970703, 6.974830627441406, 50.04669189453125, 41.188987731933594, 59.07037353515625, 15.44803237915039, 12.641494750976562, 35.1588134765625, 9.172870635986328, 3.1340160369873047, 6.493253707885742, 43.189857482910156, 36.315025329589844, 25.58636474609375, 12.946527481079102, 36.43757629394531, 10.793655395507812, 70.25750732421875, -10.799947738647461, 33.401336669921875, 4.1341094970703125, 54.28875732421875, 46.98667907714844, -16.650279998779297, 11.75863265991211, 6.760753631591797, 12.230178833007812, 19.02142333984375, 18.646041870117188, 4.12872314453125, -8.798187255859375, 3.9366531372070312, 18.794416427612305, 6.431724548339844, 18.896427154541016, 13.702262878417969, 16.374622344970703, 0.07662200927734375, 17.604995727539062, 1.8292999267578125, 6.466184616088867, 14.752281188964844, 23.798812866210938, 10.250600814819336, 1.2462005615234375, 26.5679931640625, 17.472610473632812, 33.9678955078125, 9.206443786621094, -7.19049072265625, 7.0327301025390625, 4.77593994140625, 4.2216644287109375, 16.95935821533203], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000135.npy"} +{"epoch": 0.19823788546255505, "step": 136, "batch_size": 64, "mean": 16.596004486083984, "std": 19.312175750732422, "min": -6.646942138671875, "p10": -2.215103912353514, "median": 12.25448226928711, "p90": 34.04188499450684, "max": 113.54296875, "pos_frac": 0.875, "sample": [20.797786712646484, 18.44683074951172, 26.700225830078125, 17.07642364501953, 4.440177917480469, -3.414287567138672, 49.45396423339844, 33.90541458129883, -6.35546875, 14.487079620361328, 24.154495239257812, 22.46271514892578, 32.44667053222656, 2.912567138671875, 27.280532836914062, 12.515716552734375, 19.006942749023438, 8.451257705688477, 35.7366943359375, 71.3614501953125, -0.7653350830078125, 34.100372314453125, 113.54296875, 6.1587677001953125, 17.54216766357422, 10.221122741699219, 37.41162109375, 9.348011016845703, 53.99676513671875, 0.09586524963378906, 18.32769012451172, 5.402229309082031, 22.348024368286133, 9.295120239257812, 9.87127685546875, 6.8871612548828125, 10.442352294921875, 1.8848190307617188, 20.072219848632812, 14.917984008789062, 12.974002838134766, -6.646942138671875, -3.679168701171875, 29.846389770507812, -3.259187698364258, 3.054311752319336, 7.039089202880859, 21.593843460083008, 31.893474578857422, 25.531768798828125, -2.8364334106445312, 8.940101623535156, 13.664352416992188, 10.307182312011719, 9.332763671875, 5.4732666015625, 6.602508544921875, 4.217418670654297, -4.434501647949219, 0.5456180572509766, 13.69748306274414, 31.833267211914062, 11.993247985839844, 1.491973876953125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000136.npy"} +{"epoch": 0.19970631424375918, "step": 137, "batch_size": 64, "mean": 22.422649383544922, "std": 22.97294044494629, "min": -26.417160034179688, "p10": -2.011648368835448, "median": 19.237220764160156, "p90": 55.12747726440432, "max": 94.377197265625, "pos_frac": 0.859375, "sample": [22.266815185546875, 15.119026184082031, 10.911819458007812, 28.429458618164062, 42.715782165527344, 6.459144592285156, 6.027900695800781, 20.157394409179688, 1.3596382141113281, 46.466880798339844, 7.1747283935546875, 21.990982055664062, 94.377197265625, 6.047054290771484, 17.322853088378906, -5.400627136230469, 24.870208740234375, 14.919807434082031, 12.999889373779297, 50.01258850097656, 57.31957244873047, 31.007713317871094, 2.9586620330810547, 64.1632080078125, 23.535873413085938, 41.25636291503906, 47.101806640625, 20.217575073242188, 2.6119136810302734, 60.009361267089844, -9.03814697265625, 22.4384765625, 19.34821319580078, 9.335060119628906, 43.680145263671875, 0.8161201477050781, 49.000701904296875, 29.408071517944336, -2.4619979858398438, 17.685562133789062, 22.105972290039062, 11.819908142089844, 23.559814453125, -26.417160034179688, -2.5679397583007812, -5.5531768798828125, 33.62591552734375, 4.462299346923828, 19.12622833251953, 74.9710693359375, 14.563751220703125, 27.288375854492188, 5.321691513061523, 39.38702392578125, -0.9608325958251953, -0.15444374084472656, 8.254318237304688, 4.066967010498047, 5.558492660522461, 32.52594757080078, 65.06916809082031, -3.1615447998046875, 41.19105529785156, 66.34384155273438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000137.npy"} +{"epoch": 0.2011747430249633, "step": 138, "batch_size": 64, "mean": 18.525365829467773, "std": 18.633071899414062, "min": -33.681884765625, "p10": -0.11109848022460778, "median": 16.008777618408203, "p90": 41.25808258056641, "max": 69.5491943359375, "pos_frac": 0.890625, "sample": [39.491180419921875, -4.4503326416015625, 35.484859466552734, 43.410736083984375, 19.295684814453125, 12.923103332519531, 4.2929840087890625, -0.7952766418457031, 2.4225311279296875, 24.4642333984375, 16.65163803100586, 16.181121826171875, 31.103790283203125, 2.23101806640625, 15.113897323608398, -33.681884765625, 9.839311599731445, 1.779541015625, 2.5341835021972656, 15.836433410644531, 38.96942901611328, 47.657569885253906, 3.1077938079833984, 2.89764404296875, 57.82499313354492, 32.185302734375, 4.201072692871094, 6.212413787841797, 26.406410217285156, 40.249237060546875, 23.34906005859375, 5.333707809448242, 21.935138702392578, 9.911163330078125, 46.318077087402344, 41.69044494628906, 8.27703857421875, 26.657054901123047, -7.794441223144531, 10.713264465332031, 24.359451293945312, 14.974353790283203, 13.653594970703125, 1.4853172302246094, 4.743003845214844, -2.0604324340820312, 34.400787353515625, 39.30464172363281, 11.96444320678711, 25.289642333984375, 33.432952880859375, 14.920160293579102, 23.8414306640625, -19.450096130371094, -9.662622451782227, 27.510486602783203, 11.384078979492188, 24.384201049804688, 6.5426483154296875, 69.5491943359375, 25.32115936279297, 21.007705688476562, 33.49738311767578, 54.99884033203125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000138.npy"} +{"epoch": 0.2026431718061674, "step": 139, "batch_size": 64, "mean": 20.38736343383789, "std": 18.344341278076172, "min": -6.552803039550781, "p10": -0.2380430221557616, "median": 16.26479148864746, "p90": 44.9964469909668, "max": 72.0260009765625, "pos_frac": 0.875, "sample": [-3.5448455810546875, 28.99005889892578, 28.669097900390625, 36.99430847167969, -0.7320632934570312, 28.961395263671875, 44.12449645996094, 3.175830841064453, 16.341968536376953, 35.82795715332031, -5.1295013427734375, 23.069679260253906, 4.272224426269531, 62.145263671875, 45.370140075683594, 27.053937911987305, 36.84132385253906, 17.529518127441406, 15.057891845703125, 12.124549865722656, 52.441184997558594, 19.419300079345703, -0.8468856811523438, 25.43292236328125, 16.18761444091797, 3.5305919647216797, 40.74041748046875, 30.824310302734375, 3.8651657104492188, 24.548934936523438, 39.48115539550781, -0.2877063751220703, 14.422782897949219, 21.99353790283203, 17.061111450195312, 17.22711181640625, 11.68511962890625, 0.2705707550048828, 0.6375465393066406, 5.414207458496094, 9.393997192382812, 0.3672065734863281, 6.699394226074219, 36.059967041015625, 14.54940414428711, 57.707611083984375, 31.73448944091797, -6.552803039550781, 8.366584777832031, 3.0002593994140625, 32.52429962158203, -1.079132080078125, 15.50025749206543, 14.323341369628906, 72.0260009765625, 11.891643524169922, 64.08726501464844, 11.793914794921875, 33.02308654785156, -0.122161865234375, 48.260833740234375, 15.495231628417969, 7.111686706542969, 17.436668395996094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000139.npy"} +{"epoch": 0.20411160058737152, "step": 140, "batch_size": 64, "mean": 19.300575256347656, "std": 19.64964485168457, "min": -16.71148681640625, "p10": -3.006729698181151, "median": 15.208828926086426, "p90": 44.81263732910156, "max": 68.39668273925781, "pos_frac": 0.859375, "sample": [3.270284652709961, 37.2176399230957, 66.89739990234375, -5.511497497558594, -1.2245025634765625, 30.784042358398438, 37.85101318359375, 49.72243881225586, 24.052682876586914, 42.5933837890625, 37.76409912109375, 16.69546127319336, 30.595230102539062, 6.244192123413086, 22.753189086914062, 15.945215225219727, 51.67810821533203, 3.411914825439453, -1.8921661376953125, 23.59178924560547, 6.523139953613281, 23.360572814941406, 22.74365234375, 33.26471710205078, 1.6998672485351562, 5.775360107421875, 45.18928527832031, 9.68841552734375, 6.2513885498046875, 38.69191360473633, 9.492351531982422, 6.553825378417969, 7.8579559326171875, 37.706787109375, 2.7270660400390625, 14.472442626953125, 37.31025314331055, 12.028274536132812, 5.053466796875, 24.700729370117188, 30.965248107910156, -5.442340850830078, 43.93379211425781, -16.71148681640625, -4.836967468261719, 22.09079360961914, 12.095504760742188, 28.36199951171875, 54.960304260253906, 3.5244979858398438, -3.4843997955322266, 32.3988037109375, 0.4922466278076172, 29.113800048828125, -14.639900207519531, 10.961437225341797, -5.8647918701171875, 0.3572235107421875, 68.39668273925781, 53.57978820800781, 3.7608718872070312, 11.364421844482422, 3.0160064697265625, 33.31182861328125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000140.npy"} +{"epoch": 0.2055800293685756, "step": 141, "batch_size": 64, "mean": 19.22583770751953, "std": 17.37042808532715, "min": -18.364532470703125, "p10": -1.4518745422363255, "median": 18.982088088989258, "p90": 45.20720291137695, "max": 55.35432434082031, "pos_frac": 0.890625, "sample": [51.14314270019531, 16.406333923339844, 14.188129425048828, 32.649513244628906, 49.70185852050781, 4.309135437011719, 24.847015380859375, 22.973209381103516, 14.98297119140625, 1.8945121765136719, 14.267780303955078, 45.775665283203125, -18.364532470703125, 17.90656280517578, -2.5840682983398438, 21.689041137695312, 3.2019901275634766, -4.436553955078125, 55.35432434082031, -14.18817138671875, 31.149749755859375, 42.99015808105469, 20.057613372802734, 33.01353454589844, 38.33428955078125, 23.384201049804688, 21.041778564453125, 14.503059387207031, 12.854339599609375, 14.430137634277344, 26.046424865722656, 5.843982696533203, 4.197292327880859, 38.58856201171875, 50.779052734375, 6.429319381713867, 27.207351684570312, 7.1212921142578125, -7.63813591003418, 6.977022171020508, 1.189910888671875, 25.414873123168945, 7.792167663574219, 50.06648254394531, 6.90101432800293, 33.565879821777344, 27.33197784423828, 8.8935546875, 5.81126594543457, -12.627593994140625, 14.393508911132812, 31.72307586669922, 12.280509948730469, 4.868721008300781, 23.040283203125, 35.747154235839844, 44.898223876953125, 28.04338836669922, 3.35992431640625, 45.339622497558594, 25.639389038085938, 20.42901611328125, -6.071632385253906, 23.39401626586914], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000141.npy"} +{"epoch": 0.20704845814977973, "step": 142, "batch_size": 64, "mean": 19.969118118286133, "std": 22.05319595336914, "min": -42.372589111328125, "p10": -2.91082649230957, "median": 17.22383403778076, "p90": 52.73888015747071, "max": 68.15338134765625, "pos_frac": 0.875, "sample": [-2.6151161193847656, 58.795745849609375, 2.7047863006591797, 9.82394027709961, 5.397380828857422, 32.53507995605469, -19.46999740600586, 5.923982620239258, -11.8570556640625, 10.628849029541016, 30.436019897460938, 33.821502685546875, 51.374122619628906, 11.353107452392578, 18.19025993347168, 39.84822082519531, 1.0579948425292969, -42.372589111328125, -3.4050750732421875, 19.505630493164062, 29.33995819091797, 20.82709503173828, 7.883808135986328, 47.210693359375, 53.32377624511719, 14.35209846496582, 19.779457092285156, 26.22041130065918, 30.06043243408203, -3.0531558990478516, 19.17041015625, 50.76123809814453, 31.31207275390625, 25.45032501220703, 0.5121192932128906, 56.99659729003906, 58.89039611816406, 16.257408142089844, 2.7511959075927734, 3.970733642578125, 1.8459320068359375, 7.145938873291016, 0.6776657104492188, 45.1683349609375, 21.294296264648438, 50.92462158203125, 4.554975509643555, -3.0375595092773438, 10.338096618652344, 12.061012268066406, 25.256000518798828, 0.402099609375, 9.12834358215332, -6.7038116455078125, 68.15338134765625, 58.26654815673828, 9.245506286621094, 54.622962951660156, 3.9746780395507812, 6.301965713500977, 32.951393127441406, 43.57183074951172, 20.654951095581055, 37.53056335449219], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000142.npy"} +{"epoch": 0.20851688693098386, "step": 143, "batch_size": 64, "mean": 15.336509704589844, "std": 18.686594009399414, "min": -24.52129364013672, "p10": -6.495886993408202, "median": 13.567496299743652, "p90": 37.572973632812506, "max": 77.52992248535156, "pos_frac": 0.765625, "sample": [16.862640380859375, 2.8096466064453125, 20.01862335205078, 20.990215301513672, 49.58038330078125, 11.652427673339844, -2.7193470001220703, 34.428871154785156, 42.81099319458008, 5.432134628295898, 28.5020751953125, -24.52129364013672, -7.884834289550781, 21.62896728515625, 24.010665893554688, -1.7915477752685547, -8.641702651977539, 3.489957809448242, 31.908432006835938, -6.772735595703125, 9.081764221191406, 39.82331848144531, -3.7661094665527344, 23.2249755859375, 17.3917236328125, -11.194469451904297, 20.49639892578125, 33.1949462890625, 15.542583465576172, 27.72604751586914, -5.849906921386719, -7.906667709350586, 5.901588439941406, 6.5676727294921875, 36.947731018066406, 16.6824951171875, 12.382644653320312, 13.895235061645508, 18.768585205078125, 77.52992248535156, 10.614410400390625, -1.9022903442382812, 11.362884521484375, 37.84093475341797, 0.5821609497070312, 32.57756805419922, 32.400917053222656, 40.946800231933594, -1.1579513549804688, 25.89300537109375, -2.357452392578125, 58.79095458984375, 13.239757537841797, 22.16596221923828, 22.01519012451172, 12.423261642456055, 24.398284912109375, 9.753768920898438, 30.341957092285156, -23.436752319335938, 5.571531295776367, 11.34913444519043, -2.2773361206054688, 2.1648712158203125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000143.npy"} +{"epoch": 0.20998531571218795, "step": 144, "batch_size": 64, "mean": 19.694934844970703, "std": 16.19609832763672, "min": -9.603065490722656, "p10": -0.5643539428710935, "median": 19.230892181396484, "p90": 39.355914306640635, "max": 72.08677673339844, "pos_frac": 0.875, "sample": [-1.392181396484375, 23.100021362304688, 10.618330001831055, 43.58427429199219, -9.603065490722656, 11.99725341796875, 28.04998207092285, 28.048019409179688, 23.41765594482422, 25.315906524658203, 36.91187286376953, 24.682384490966797, 18.67083740234375, 22.766647338867188, 28.979442596435547, 31.02092742919922, 14.83646011352539, 25.88074493408203, 40.37165069580078, 48.994789123535156, 35.602264404296875, 44.414100646972656, 3.4957637786865234, -4.846221923828125, 24.719818115234375, -0.297698974609375, 19.408958435058594, 5.243339538574219, 26.022680282592773, 3.6939945220947266, -8.360910415649414, 24.780790328979492, 29.6683292388916, 31.709373474121094, -2.4466781616210938, 31.85382080078125, 12.1932373046875, 10.120098114013672, 11.480045318603516, 18.512670516967773, 5.5839080810546875, 14.97998046875, 58.088966369628906, 24.30845069885254, 11.754539489746094, 22.92742156982422, -8.00942611694336, 11.273307800292969, 25.856491088867188, 36.985862731933594, 24.87757110595703, 48.562103271484375, 2.750396728515625, 5.8446044921875, 13.811790466308594, 2.02569580078125, 72.08677673339844, 11.38088607788086, 10.635515213012695, 14.055435180664062, 19.052825927734375, 11.920028686523438, 27.18157958984375, -0.6786346435546875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000144.npy"} +{"epoch": 0.21145374449339208, "step": 145, "batch_size": 64, "mean": 19.97324562072754, "std": 18.227842330932617, "min": -10.279708862304688, "p10": 0.8428533554077149, "median": 14.21449089050293, "p90": 45.570151901245126, "max": 77.8551025390625, "pos_frac": 0.90625, "sample": [11.736526489257812, 4.117156982421875, 21.306381225585938, 77.8551025390625, 30.242923736572266, 51.75091552734375, 38.84130859375, 12.827312469482422, -10.279708862304688, 21.268596649169922, 4.748199462890625, 9.746795654296875, 30.309860229492188, 7.141864776611328, 19.656543731689453, 22.11675262451172, 41.673789978027344, 12.451013565063477, 14.00802993774414, -1.9667720794677734, 0.8084621429443359, 9.885902404785156, 9.708511352539062, 61.42003631591797, 14.759597778320312, 2.7040023803710938, 10.196632385253906, 0.9230995178222656, 11.445465087890625, 43.25555419921875, 13.984277725219727, 14.361602783203125, 32.954986572265625, -4.572395324707031, -0.1134490966796875, 31.137786865234375, 4.639556884765625, 4.988447189331055, 20.949623107910156, -3.0992050170898438, 18.153247833251953, 14.067378997802734, 50.28887939453125, 1.9002723693847656, 35.732276916503906, 57.28694152832031, 21.039161682128906, -5.468746185302734, 24.03913116455078, 48.414527893066406, 5.384807586669922, 13.442449569702148, 37.578826904296875, 33.48576354980469, 24.05782699584961, 3.138570785522461, 9.068073272705078, 16.562824249267578, 12.6495361328125, 31.50177764892578, 46.5621223449707, 33.63260269165039, 3.298490524291992, 42.57989501953125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000145.npy"} +{"epoch": 0.21292217327459617, "step": 146, "batch_size": 64, "mean": 16.486042022705078, "std": 17.46185874938965, "min": -13.014801025390625, "p10": -3.8581787109375, "median": 14.199779510498047, "p90": 40.97269744873047, "max": 68.16824340820312, "pos_frac": 0.8125, "sample": [17.223129272460938, 8.700721740722656, 48.31214904785156, -10.572547912597656, 11.325847625732422, 0.422332763671875, 37.23325729370117, 13.925888061523438, 20.528076171875, 13.233993530273438, 43.12376403808594, 8.677864074707031, 6.535037994384766, 21.830360412597656, 18.14215850830078, 18.077369689941406, 20.368804931640625, 1.6453323364257812, 4.049201965332031, 0.38187217712402344, -0.198028564453125, 12.748603820800781, 27.542938232421875, 46.358306884765625, -6.6781463623046875, 21.006759643554688, 23.63055419921875, -13.014801025390625, 13.97650146484375, 31.310791015625, -3.1391525268554688, 13.540328979492188, 9.51649284362793, 22.578018188476562, 68.16824340820312, -7.417869567871094, 16.67359161376953, 7.417211532592773, 21.58893585205078, 41.006439208984375, 15.77318000793457, 40.89396667480469, 56.244075775146484, 15.203800201416016, 18.873023986816406, 14.423057556152344, 39.18010711669922, -11.290130615234375, 7.6788330078125, -1.7824554443359375, 32.183380126953125, 33.68006134033203, 24.61359405517578, 5.197771072387695, 3.6429824829101562, 4.7203826904296875, 8.746282577514648, -3.8776702880859375, 27.9747314453125, -1.8303375244140625, 39.12068176269531, -4.087892532348633, 43.85768127441406, -3.8126983642578125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000146.npy"} +{"epoch": 0.2143906020558003, "step": 147, "batch_size": 64, "mean": 20.196962356567383, "std": 18.97796630859375, "min": -12.436393737792969, "p10": 0.19122486114502024, "median": 15.527737617492676, "p90": 45.74578704833985, "max": 72.00064086914062, "pos_frac": 0.890625, "sample": [61.0794677734375, -11.696033477783203, 24.23553466796875, 40.253326416015625, 10.680023193359375, 6.030391693115234, 24.61065673828125, 42.79480743408203, 11.18463134765625, 2.1943588256835938, 5.893123626708984, 16.6268310546875, 16.676095962524414, 25.794898986816406, 19.530155181884766, 59.833763122558594, 31.508182525634766, 15.638647079467773, 5.695220947265625, 38.19615936279297, 53.00959396362305, 6.3210296630859375, 31.361583709716797, 7.473894119262695, 72.00064086914062, 10.528244018554688, 56.19264221191406, 33.3751106262207, 14.297599792480469, 30.16997528076172, 13.175312042236328, -0.49514007568359375, 0.9139156341552734, 45.05389404296875, 24.34368133544922, 1.397918701171875, 22.925251007080078, 70.9691162109375, 13.410711288452148, 9.632938385009766, 27.179115295410156, 7.14483642578125, 3.6775131225585938, 7.6988372802734375, 12.261825561523438, 24.960548400878906, 5.4676361083984375, 16.728260040283203, 29.589370727539062, -1.3377151489257812, -0.8939361572265625, 17.891895294189453, 15.416828155517578, 24.84344482421875, -0.118499755859375, 11.79891586303711, 46.04231262207031, 15.148796081542969, 18.891754150390625, -9.112884521484375, 42.062744140625, -12.436393737792969, 14.600082397460938, 12.2821044921875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000147.npy"} +{"epoch": 0.21585903083700442, "step": 148, "batch_size": 64, "mean": 16.166454315185547, "std": 16.389705657958984, "min": -15.479751586914062, "p10": 0.42838096618652377, "median": 11.965677261352539, "p90": 40.41294326782227, "max": 55.543212890625, "pos_frac": 0.90625, "sample": [0.7312583923339844, 16.966552734375, 2.72265625, 46.824005126953125, 21.12390899658203, 15.466133117675781, 37.491798400878906, 13.679401397705078, -3.8210525512695312, 54.754554748535156, 11.415626525878906, 36.76849365234375, 15.470394134521484, 23.96526336669922, 31.84325408935547, 40.88789367675781, 4.771402359008789, -4.382049560546875, 0.29857635498046875, -4.4742279052734375, 31.0682373046875, 23.776290893554688, 4.579202651977539, 22.123992919921875, 41.857730865478516, 13.927703857421875, 21.45368194580078, 8.368721008300781, 48.8790168762207, -2.3347034454345703, 39.104087829589844, 6.932861328125, 8.748617172241211, 13.918853759765625, 1.3931236267089844, 9.303583145141602, 2.0929107666015625, 14.149993896484375, 27.467788696289062, 6.661975860595703, 39.304725646972656, 11.134490966796875, -10.585418701171875, 4.894012451171875, 10.182062149047852, 8.413116455078125, 20.102203369140625, 5.283149719238281, 15.725807189941406, 2.5824050903320312, -15.479751586914062, 13.306648254394531, 14.502523422241211, 7.816801071166992, 12.515727996826172, 3.326211929321289, 10.503799438476562, 53.9781494140625, 55.543212890625, 3.701976776123047, 6.374021530151367, 9.492729187011719, 37.32911682128906, 8.727855682373047], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000148.npy"} +{"epoch": 0.2173274596182085, "step": 149, "batch_size": 64, "mean": 20.686452865600586, "std": 16.941299438476562, "min": -9.9119873046875, "p10": 0.33076858520507846, "median": 20.37682342529297, "p90": 41.4132667541504, "max": 68.23854064941406, "pos_frac": 0.90625, "sample": [28.269813537597656, 21.875473022460938, 54.19309997558594, 12.066181182861328, 28.725860595703125, 9.352859497070312, 10.275625228881836, -1.3923530578613281, -2.1323013305664062, 22.599048614501953, 7.606607437133789, 11.921894073486328, 24.25967788696289, 23.086055755615234, 5.302845001220703, -8.149375915527344, 47.16265869140625, 25.299560546875, 4.8352203369140625, 26.89435577392578, 12.300384521484375, 39.37914276123047, 35.51850891113281, 7.406393051147461, 8.980081558227539, 24.549896240234375, 11.276453018188477, 36.05241394042969, 33.0504035949707, 34.02122497558594, 36.337890625, 0.20153045654296875, 68.23854064941406, 6.59416389465332, 35.847496032714844, 18.561561584472656, 56.675750732421875, 10.016502380371094, 2.924121856689453, 30.898521423339844, 15.454700469970703, 35.01264572143555, 16.483285903930664, 23.735979080200195, 0.63232421875, 30.086729049682617, 12.176822662353516, 32.65692138671875, 10.177703857421875, -0.2676239013671875, 20.884765625, 23.140037536621094, -1.7550048828125, 44.534461975097656, 13.290586471557617, 19.868881225585938, 5.628772735595703, 23.35576820373535, 42.2850341796875, 64.464599609375, 5.131965637207031, 8.42905044555664, 27.552837371826172, -9.9119873046875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000149.npy"} +{"epoch": 0.21879588839941264, "step": 150, "batch_size": 64, "mean": 19.51645278930664, "std": 19.830421447753906, "min": -19.64849090576172, "p10": 0.19786605834961019, "median": 15.738443374633789, "p90": 41.80986480712892, "max": 83.6280517578125, "pos_frac": 0.890625, "sample": [54.356597900390625, 72.18821716308594, 23.20549774169922, 7.80645751953125, 29.021873474121094, 1.0222320556640625, 22.41490936279297, 17.06012535095215, -19.64849090576172, 4.87451171875, 83.6280517578125, -3.3860034942626953, 18.762088775634766, 12.603771209716797, 7.147876739501953, 31.627883911132812, 12.114517211914062, 25.5294132232666, 20.300506591796875, 19.953475952148438, 15.62820816040039, 34.19017791748047, 61.148704528808594, 13.755050659179688, 35.67250061035156, 23.65575408935547, 42.92877197265625, 32.01708221435547, 11.385522842407227, 33.58829116821289, 4.82659912109375, -11.52197265625, 23.061683654785156, 36.755462646484375, 3.104339599609375, 3.681417465209961, 43.716278076171875, 12.425849914550781, 26.140823364257812, 3.7209625244140625, 25.464889526367188, 18.159751892089844, 3.2785682678222656, -0.15543365478515625, 39.19908142089844, -2.8770217895507812, 7.77252197265625, 21.586599349975586, 13.463783264160156, 11.678924560546875, 10.17791748046875, 13.713886260986328, 22.870880126953125, 2.5167770385742188, 30.19757652282715, 76.07391357421875, 13.28957748413086, 5.019260406494141, -4.320762634277344, 30.50183868408203, 15.848678588867188, 2.4266815185546875, -6.4892120361328125, 9.189277648925781], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000150.npy"} +{"epoch": 0.22026431718061673, "step": 151, "batch_size": 64, "mean": 21.386999130249023, "std": 18.111618041992188, "min": -13.894441604614258, "p10": 1.0023443222045907, "median": 17.430262565612793, "p90": 44.83815078735352, "max": 72.49441528320312, "pos_frac": 0.90625, "sample": [-8.92413330078125, 72.49441528320312, 27.967117309570312, 8.735389709472656, 2.48541259765625, 5.467498779296875, 25.919700622558594, 26.55435562133789, 24.402603149414062, 14.474761962890625, 45.239044189453125, -0.7842922210693359, -3.2748489379882812, 32.321022033691406, 12.31744384765625, 2.7325286865234375, 13.377132415771484, 15.029399871826172, 16.997665405273438, 20.386703491210938, 67.28134155273438, 32.60774612426758, 20.92919921875, 16.658740997314453, -6.250328063964844, 56.30420684814453, 13.479026794433594, 15.385955810546875, -1.6952590942382812, 19.832427978515625, 21.62196922302246, 12.03996467590332, 11.009403228759766, 14.478841781616211, 9.086563110351562, 53.034942626953125, -13.894441604614258, 41.35797119140625, 15.608757019042969, 24.0875244140625, 6.806257247924805, 24.651412963867188, 17.496124267578125, 53.56281280517578, 19.634017944335938, 41.34745788574219, 38.551971435546875, 5.682764053344727, 34.87669372558594, 31.813446044921875, 23.077438354492188, 8.7288818359375, 4.410472869873047, 40.435707092285156, 14.997306823730469, 1.9045238494873047, 50.992515563964844, 43.902732849121094, 36.73309326171875, 12.681884765625, 0.6156959533691406, 21.097501754760742, 17.36440086364746, 40.519309997558594], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000151.npy"} +{"epoch": 0.22173274596182085, "step": 152, "batch_size": 64, "mean": 22.185049057006836, "std": 19.241355895996094, "min": -18.010498046875, "p10": 2.849803924560547, "median": 17.72074317932129, "p90": 50.856134033203126, "max": 80.96493530273438, "pos_frac": 0.953125, "sample": [19.620132446289062, 9.345932006835938, 42.68840026855469, 50.39814758300781, 17.669940948486328, 36.303871154785156, 25.60085678100586, 24.25885009765625, 63.68925476074219, 15.178291320800781, 41.358787536621094, 36.67600631713867, 11.853439331054688, 11.119331359863281, 25.345863342285156, 60.012664794921875, 29.977638244628906, 9.696544647216797, 22.049198150634766, 8.600990295410156, 29.166664123535156, 12.843421936035156, -11.442331314086914, 16.939430236816406, 9.356147766113281, 65.42756652832031, 29.126739501953125, 54.2864990234375, 17.77154541015625, 2.9757080078125, 18.715484619140625, -18.010498046875, 26.311458587646484, 2.7958450317382812, 4.596408843994141, 23.933202743530273, 16.95465087890625, 29.266876220703125, 16.189247131347656, 9.028839111328125, 80.96493530273438, 9.369583129882812, 18.5029296875, 16.944580078125, 15.243854522705078, 5.32781982421875, 65.36955261230469, 9.750837326049805, 0.3111610412597656, 30.009552001953125, 19.930131912231445, 14.129936218261719, 7.811126708984375, 6.984382629394531, 51.05241394042969, 0.034488677978515625, 27.09967041015625, -4.3377685546875, 12.71868896484375, 38.204933166503906, 9.1651611328125, 2.397979736328125, 32.59539794921875, 32.58476257324219], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000152.npy"} +{"epoch": 0.22320117474302498, "step": 153, "batch_size": 64, "mean": 18.822778701782227, "std": 19.090059280395508, "min": -16.6564998626709, "p10": -0.5571786880493163, "median": 16.879642486572266, "p90": 44.42392311096193, "max": 78.7630615234375, "pos_frac": 0.875, "sample": [30.52532386779785, 18.591873168945312, 10.178192138671875, 3.3981857299804688, 7.353588104248047, 27.837799072265625, 2.769521713256836, 27.1343994140625, 3.1211700439453125, 68.54142761230469, 23.4266357421875, 23.163597106933594, 3.4764976501464844, 25.418933868408203, 48.39332580566406, -12.26513671875, 35.83148193359375, 4.18235969543457, 20.731735229492188, 38.3408203125, 22.3990478515625, 78.7630615234375, 13.127738952636719, 24.36564826965332, 18.671340942382812, 58.24431610107422, 3.5827178955078125, 5.797702789306641, 49.00077819824219, 22.864532470703125, 46.26933670043945, 4.865104675292969, 34.10646057128906, 3.305753707885742, 0.4634361267089844, 2.7691192626953125, 15.167411804199219, 24.21489715576172, 1.7287979125976562, -4.204925537109375, 9.69342041015625, 11.265914916992188, 4.1464385986328125, 34.03131103515625, 7.733386993408203, -0.3629112243652344, -16.6564998626709, 14.409088134765625, 28.992034912109375, -4.1346893310546875, 28.843643188476562, 52.71052551269531, 29.30889129638672, -2.7121410369873047, 26.010339736938477, -0.6404361724853516, 26.8758544921875, 12.712032318115234, 23.573951721191406, 40.117958068847656, 3.495159149169922, -3.8961715698242188, 5.201011657714844, 38.2857551574707], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000153.npy"} +{"epoch": 0.22466960352422907, "step": 154, "batch_size": 64, "mean": 18.761655807495117, "std": 19.334016799926758, "min": -19.064937591552734, "p10": -6.254926300048828, "median": 18.218921661376953, "p90": 39.85145568847656, "max": 66.89236450195312, "pos_frac": 0.828125, "sample": [2.320425033569336, 1.6690216064453125, -17.302459716796875, 0.017217636108398438, 66.89236450195312, 23.77165985107422, 7.106340408325195, 18.274063110351562, 37.124786376953125, -6.639533996582031, 11.709144592285156, 55.54548645019531, -9.207595825195312, 20.383773803710938, 29.036540985107422, 23.77981185913086, -1.608469009399414, -8.337417602539062, 15.619474411010742, 0.8951740264892578, 29.132301330566406, 29.788654327392578, 1.0852794647216797, 7.571804046630859, 7.9055023193359375, 36.59950256347656, 18.163780212402344, 37.576385498046875, 32.624778747558594, -6.4894866943359375, 8.279243469238281, 49.780006408691406, -3.9092636108398438, 40.1607666015625, -9.076993942260742, 39.129730224609375, 59.2406005859375, 50.8597412109375, 7.9613037109375, 38.3704833984375, 35.63874053955078, 30.3193359375, 6.286766052246094, 23.527847290039062, 31.412002563476562, 14.12984848022461, 15.7589111328125, 21.47101402282715, -0.8032608032226562, 27.8863525390625, 30.86016845703125, 18.955093383789062, -5.707618713378906, 13.72314453125, 16.654525756835938, -19.064937591552734, 32.7211799621582, 28.066452026367188, 25.529403686523438, 9.227005004882812, 9.596221923828125, 7.592218399047852, 21.66387939453125, 59.497711181640625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000154.npy"} +{"epoch": 0.2261380323054332, "step": 155, "batch_size": 64, "mean": 19.738880157470703, "std": 17.74778938293457, "min": -24.239852905273438, "p10": 0.862902641296387, "median": 17.449012756347656, "p90": 36.44425735473633, "max": 73.8837661743164, "pos_frac": 0.921875, "sample": [11.523017883300781, 7.892555236816406, 3.186433792114258, -2.0841522216796875, 24.050342559814453, 1.4467430114746094, 33.48439407348633, 0.714080810546875, 16.647876739501953, 0.11250114440917969, 3.5713272094726562, 11.135997772216797, 14.483131408691406, 9.4658203125, 20.25176239013672, 25.51762580871582, 30.990528106689453, 28.485862731933594, 25.82483673095703, 14.162612915039062, 28.572288513183594, 15.761077880859375, 25.73107147216797, 33.41398620605469, 61.93503189086914, 6.355264663696289, 33.937477111816406, 34.551673889160156, 12.289432525634766, 19.63079833984375, 6.899335861206055, 1.8576431274414062, 17.382205963134766, 23.44329833984375, 47.833526611328125, -24.239852905273438, 55.63807678222656, 8.175270080566406, 26.407188415527344, 5.3546295166015625, 9.53370475769043, 32.0196533203125, 36.7635498046875, 14.4842529296875, 73.8837661743164, 20.56487274169922, 65.50906372070312, 30.61968994140625, -12.111526489257812, 14.592803955078125, 13.011398315429688, 35.699241638183594, 17.515819549560547, 30.85387420654297, 19.28342056274414, -5.804100036621094, 30.71774673461914, 38.03009796142578, 21.09654998779297, 1.210153579711914, 34.2202033996582, 8.405059814453125, -1.4334907531738281, 12.829856872558594], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000155.npy"} +{"epoch": 0.2276064610866373, "step": 156, "batch_size": 64, "mean": 15.239997863769531, "std": 20.016712188720703, "min": -37.64312744140625, "p10": -7.2251132965087885, "median": 13.131226539611816, "p90": 43.13808174133302, "max": 64.48558044433594, "pos_frac": 0.796875, "sample": [35.74999237060547, 13.351806640625, 45.88493347167969, 47.998779296875, 29.900665283203125, 12.303878784179688, -18.384811401367188, 1.1488265991210938, 9.29510498046875, 11.30328369140625, -12.946884155273438, 46.23855972290039, 10.875114440917969, -0.9014663696289062, -6.369091033935547, -24.78331756591797, 24.612377166748047, 40.440025329589844, 48.965606689453125, 16.920379638671875, 9.4453125, 39.13948059082031, 1.98095703125, 27.18840789794922, 10.339759826660156, 3.3036117553710938, 5.229059219360352, 17.70044708251953, 64.48558044433594, 9.840309143066406, 53.53910827636719, 37.45586395263672, 44.29439163208008, 16.0343017578125, -7.59197998046875, 0.7997570037841797, 11.451026916503906, -8.033218383789062, -37.64312744140625, -2.5310134887695312, 21.57654571533203, -1.3680648803710938, 4.38177490234375, 26.424896240234375, 22.832229614257812, 5.6504364013671875, 21.699838638305664, 40.180564880371094, 15.074462890625, 12.910646438598633, 0.11267852783203125, 20.383033752441406, 12.429237365722656, 24.082231521606445, -24.98992156982422, -2.8286590576171875, 30.08795166015625, 27.77252960205078, 20.85950469970703, 12.730117797851562, 13.396976470947266, 14.935798645019531, -4.183727264404297, 33.17695236206055], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000156.npy"} +{"epoch": 0.2290748898678414, "step": 157, "batch_size": 64, "mean": 21.57200050354004, "std": 19.284809112548828, "min": -9.315010070800781, "p10": -0.7855901718139645, "median": 18.865703582763672, "p90": 45.364188766479494, "max": 76.09053039550781, "pos_frac": 0.875, "sample": [37.70735168457031, 34.64685821533203, 24.817787170410156, 46.33726501464844, 17.048316955566406, 3.3926048278808594, 11.470443725585938, 45.84323501586914, 8.39678955078125, 42.48442077636719, 22.95870590209961, 44.24641418457031, 17.214658737182617, 31.019668579101562, 33.26971435546875, 27.260894775390625, 31.20885467529297, 21.606443405151367, 26.349952697753906, 27.69768524169922, 20.36486053466797, -3.1743927001953125, 31.776634216308594, -0.9278717041015625, 32.040374755859375, 24.999801635742188, -0.9478073120117188, 17.366546630859375, 6.871429443359375, 49.044349670410156, -6.12347412109375, 12.408445358276367, 13.527114868164062, 1.8458728790283203, 7.384033203125, 29.21766471862793, 38.020416259765625, 10.53399658203125, 33.91529846191406, -0.4535999298095703, -1.9526710510253906, 64.55702209472656, 7.750143051147461, 6.966789245605469, 2.2879676818847656, 62.18735122680664, 37.23704528808594, -6.59412956237793, 71.06258392333984, 8.260677337646484, 36.44751739501953, 5.9443511962890625, 1.4263496398925781, 15.525993347167969, 36.47038269042969, 28.55010986328125, 76.09053039550781, 25.020233154296875, 12.645286560058594, -9.315010070800781, 17.13922119140625, 5.855751037597656, 0.677734375, 3.6990585327148438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000157.npy"} +{"epoch": 0.2305433186490455, "step": 158, "batch_size": 64, "mean": 21.917728424072266, "std": 21.43829917907715, "min": -3.793283462524414, "p10": -0.5671649932861323, "median": 17.011685371398926, "p90": 47.815019226074234, "max": 77.486328125, "pos_frac": 0.875, "sample": [7.046031951904297, 1.0474433898925781, 18.72382164001465, 15.800445556640625, 18.222925186157227, 12.726930618286133, 5.333473205566406, 4.127265930175781, 24.82436752319336, 26.216144561767578, 41.33935546875, -2.216217041015625, 43.1796875, 34.563297271728516, -0.758392333984375, -3.3309288024902344, 1.1276626586914062, 33.59893035888672, 6.941440582275391, 3.799407958984375, 71.025390625, -0.12096786499023438, 38.02294921875, 8.070648193359375, 49.80158996582031, 5.8312225341796875, 37.60417938232422, 9.161510467529297, 6.96160888671875, 3.824779510498047, 33.900447845458984, 29.01848602294922, 5.759956359863281, 37.8873291015625, -0.9044532775878906, -1.7020263671875, 57.09925842285156, 37.0504150390625, 1.9552383422851562, 71.93861389160156, 72.90760803222656, 7.335540771484375, 9.069622039794922, 77.486328125, 2.7817764282226562, 22.496841430664062, 0.5932044982910156, 27.149383544921875, 9.872360229492188, 12.453926086425781, 76.30975341796875, 40.148704528808594, 31.171409606933594, 32.07734680175781, -1.630777359008789, 37.37318420410156, 13.598312377929688, -3.793283462524414, 24.975631713867188, 27.820152282714844, 32.826202392578125, 25.509536743164062, 25.3414306640625, 4.361167907714844], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000158.npy"} +{"epoch": 0.23201174743024963, "step": 159, "batch_size": 64, "mean": 19.63745880126953, "std": 20.59627342224121, "min": -10.096832275390625, "p10": -5.359132575988769, "median": 16.219131469726562, "p90": 42.122381973266606, "max": 87.7291030883789, "pos_frac": 0.859375, "sample": [10.619674682617188, 8.324165344238281, 40.12809753417969, -7.698600769042969, 26.612075805664062, 31.167327880859375, -9.834419250488281, -9.519214630126953, 36.90315246582031, 22.17352294921875, 18.996856689453125, 5.477333068847656, 7.316944122314453, 0.44384765625, 16.19640350341797, 13.946273803710938, 25.015594482421875, 33.319759368896484, 47.92442321777344, 40.353885650634766, 87.7291030883789, 4.317583084106445, -10.096832275390625, 4.09027099609375, 2.9163150787353516, -0.8993377685546875, 10.525962829589844, -5.011077880859375, 9.09222412109375, 16.8623046875, 8.021148681640625, 41.81035614013672, -7.061470031738281, 12.113285064697266, 10.35723876953125, -5.508298873901367, 22.925891876220703, 15.424644470214844, 7.7399444580078125, 57.502105712890625, 38.70245361328125, 41.38804626464844, 18.46219825744629, 25.60547637939453, 9.020164489746094, 75.35848999023438, 6.400510787963867, 0.2193756103515625, 3.297882080078125, 41.35125732421875, 44.68316650390625, 16.241859436035156, 18.107070922851562, -8.107818603515625, 21.533477783203125, 32.87591552734375, 36.82384490966797, 42.256107330322266, 12.171211242675781, 62.15309143066406, 32.222267150878906, 18.108272552490234, 2.3001556396484375, 24.904434204101562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000159.npy"} +{"epoch": 0.23348017621145375, "step": 160, "batch_size": 64, "mean": 19.808996200561523, "std": 22.115102767944336, "min": -16.774818420410156, "p10": -4.5050712585449215, "median": 15.291259765625, "p90": 48.769586944580084, "max": 89.05693054199219, "pos_frac": 0.828125, "sample": [29.175262451171875, 8.954660415649414, 41.27311706542969, 25.877716064453125, 2.3206214904785156, -6.844490051269531, 13.176536560058594, 33.04322814941406, 40.483367919921875, -4.784505844116211, -16.774818420410156, 7.9530487060546875, 47.34766387939453, 51.02055358886719, 32.3309326171875, -2.2445068359375, 1.2211761474609375, 19.17005157470703, 5.0418548583984375, 49.37898254394531, 68.53126525878906, -5.9022064208984375, 31.064468383789062, -4.540824890136719, 24.509803771972656, 4.5968475341796875, 15.908340454101562, 10.815078735351562, 83.64114379882812, 24.587921142578125, 6.978565216064453, 20.794294357299805, 14.913299560546875, 51.34388732910156, 2.5916671752929688, -4.4216461181640625, 4.379402160644531, 38.510345458984375, 64.33984375, 89.05693054199219, 25.23598861694336, 25.625869750976562, -6.0334320068359375, 44.685142517089844, 30.525604248046875, 5.9208221435546875, 1.0087528228759766, 14.189796447753906, 4.9766082763671875, 24.496437072753906, 14.155281066894531, 16.014556884765625, 15.669219970703125, -4.052295684814453, 6.051300048828125, 26.919029235839844, 3.2236461639404297, -0.5265827178955078, -6.687583923339844, 31.248199462890625, 1.7874069213867188, 38.483726501464844, 13.369308471679688, 22.67005157470703], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000160.npy"} +{"epoch": 0.23494860499265785, "step": 161, "batch_size": 64, "mean": 20.026674270629883, "std": 19.912761688232422, "min": -12.71551513671875, "p10": -5.371806907653808, "median": 17.887900352478027, "p90": 45.76089935302735, "max": 83.07313537597656, "pos_frac": 0.84375, "sample": [25.281509399414062, 46.69500732421875, -3.936859130859375, 59.01018524169922, 43.88462829589844, -6.671638488769531, -12.71551513671875, -5.95574951171875, 24.890792846679688, -5.45756721496582, 13.669204711914062, 40.283111572265625, 25.5118408203125, -7.64067268371582, 6.3605499267578125, 11.935897827148438, 44.25572204589844, 22.67740821838379, 27.332138061523438, 57.08326721191406, 18.14442253112793, 14.83224105834961, 21.77322006225586, -5.171699523925781, 16.63500213623047, 2.807056427001953, 24.123573303222656, 5.987342834472656, 63.307411193847656, 83.07313537597656, 25.576919555664062, 10.611936569213867, 14.176132202148438, -1.8030662536621094, 39.004150390625, 30.043479919433594, 0.075775146484375, -6.050222396850586, 6.043052673339844, 24.188278198242188, 24.039840698242188, 9.681663513183594, -10.5760498046875, 17.631378173828125, 29.232894897460938, 22.31696319580078, 47.48332214355469, 39.067665100097656, 5.568666458129883, 15.007186889648438, 35.23094177246094, 3.00445556640625, 0.8494720458984375, 46.405975341796875, 7.546012878417969, 8.378021240234375, 12.582664489746094, 27.614280700683594, 41.1926383972168, 0.41518211364746094, 9.491674423217773, 34.693763732910156, 34.37348556518555, 26.60370635986328], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000161.npy"} +{"epoch": 0.23641703377386197, "step": 162, "batch_size": 64, "mean": 19.227447509765625, "std": 20.753376007080078, "min": -13.53802490234375, "p10": -1.3936769485473621, "median": 15.647308349609375, "p90": 41.681396102905275, "max": 114.13113403320312, "pos_frac": 0.875, "sample": [-5.331031799316406, 13.886913299560547, 19.894256591796875, 27.435882568359375, 0.4426422119140625, 49.0672607421875, 2.334867477416992, 37.295196533203125, 11.884883880615234, -1.9401683807373047, -4.6692352294921875, 40.76845932006836, 12.759101867675781, -11.060417175292969, -2.8785858154296875, 10.02762222290039, 34.352294921875, 23.701637268066406, 12.94708251953125, 24.692283630371094, 33.07232666015625, 9.510513305664062, 33.742637634277344, 4.58856201171875, 0.025541305541992188, 29.187103271484375, 20.224205017089844, 21.913986206054688, 21.777175903320312, 25.835830688476562, 34.705177307128906, -10.337677001953125, -0.1185302734375, 11.954887390136719, 36.65961456298828, 21.020076751708984, 42.072654724121094, 114.13113403320312, 25.24053955078125, 16.50440216064453, 14.790214538574219, 21.403640747070312, 28.427322387695312, 45.04624938964844, 25.858150482177734, 4.0194091796875, 0.709320068359375, 9.644584655761719, 23.300430297851562, 3.7340087890625, 4.477264404296875, 56.20269012451172, 7.8127288818359375, 2.1597213745117188, 68.36724853515625, 27.497215270996094, -13.53802490234375, 2.18914794921875, 6.246299743652344, 30.04180908203125, 13.414642333984375, 1.2094497680664062, 14.47671890258789, 45.74528503417969], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000162.npy"} +{"epoch": 0.23788546255506607, "step": 163, "batch_size": 64, "mean": 18.588184356689453, "std": 16.928220748901367, "min": -15.1075439453125, "p10": 0.4598331451416018, "median": 17.38038921356201, "p90": 39.42785491943359, "max": 62.53645324707031, "pos_frac": 0.90625, "sample": [6.927337646484375, 5.096281051635742, 53.51115417480469, 21.9515380859375, -5.794218063354492, -5.722625732421875, 10.565399169921875, 2.4973373413085938, 2.9620819091796875, 24.788551330566406, 16.699615478515625, 9.905593872070312, 23.649303436279297, 61.19044494628906, 17.561674118041992, 37.95044708251953, 26.66124725341797, 50.89898681640625, 4.292724609375, 23.72936248779297, 12.276313781738281, 24.899703979492188, 18.8223876953125, 21.010276794433594, 19.87346649169922, 3.8104076385498047, 36.953758239746094, -12.073554992675781, 9.246406555175781, 17.19910430908203, 21.275283813476562, 5.743236541748047, 21.5406494140625, 11.062171936035156, 23.47592544555664, -3.6771373748779297, -7.5429840087890625, 52.97901916503906, 39.05207061767578, 6.383522033691406, 11.95806884765625, 9.020912170410156, 23.352455139160156, 33.38726806640625, 22.096189498901367, 26.759414672851562, 7.734855651855469, 62.53645324707031, 26.212112426757812, 0.6746044158935547, 10.478008270263672, 16.728172302246094, 15.843460083007812, 39.588905334472656, 28.491676330566406, 9.142276763916016, 28.23335075378418, 13.353221893310547, -15.1075439453125, 2.7180538177490234, 45.507652282714844, 22.020233154296875, 36.913970947265625, 0.36778831481933594], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000163.npy"} +{"epoch": 0.2393538913362702, "step": 164, "batch_size": 64, "mean": 20.988468170166016, "std": 15.990802764892578, "min": -12.768966674804688, "p10": 2.798645782470704, "median": 19.882247924804688, "p90": 37.23454055786133, "max": 73.93821716308594, "pos_frac": 0.9375, "sample": [-5.667625427246094, 31.321044921875, 31.877662658691406, 43.42571258544922, 30.534019470214844, 24.958351135253906, 19.23267364501953, 43.607269287109375, 21.771305084228516, 29.842575073242188, 37.05982971191406, 7.477085113525391, -2.3643016815185547, 28.14226531982422, 19.914443969726562, 10.393814086914062, 23.926918029785156, 28.741134643554688, 2.181974411010742, 7.372438430786133, -12.768966674804688, 22.646156311035156, 23.76055908203125, 21.196075439453125, 20.285280227661133, 14.355850219726562, 2.4225997924804688, 10.877029418945312, 19.11853790283203, 8.87407112121582, 12.948280334472656, 5.01605224609375, 25.401702880859375, 35.083946228027344, 19.850051879882812, 31.946701049804688, 13.846214294433594, 37.276519775390625, 26.099130630493164, 36.96943664550781, 15.467079162597656, 16.983989715576172, 9.393226623535156, 23.623046875, 13.836469650268555, 10.493576049804688, 8.50701904296875, 3.67608642578125, 5.2883758544921875, 37.13658905029297, 25.35466766357422, 17.132034301757812, 59.07159423828125, 17.3348388671875, 58.018890380859375, 49.685035705566406, 1.1511383056640625, 23.94097900390625, 34.30610656738281, -4.521415710449219, 18.51987075805664, 12.183425903320312, 73.93821716308594, 3.7874298095703125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000164.npy"} +{"epoch": 0.24082232011747431, "step": 165, "batch_size": 64, "mean": 14.148780822753906, "std": 14.90626049041748, "min": -23.65552520751953, "p10": -1.7030897140502927, "median": 10.004853248596191, "p90": 35.17744750976563, "max": 49.629119873046875, "pos_frac": 0.84375, "sample": [13.099433898925781, 22.508386611938477, 2.1609268188476562, 7.343254089355469, 5.624359130859375, 1.9485340118408203, 17.236282348632812, 5.4837188720703125, 6.3332061767578125, -0.012603759765625, 1.8339385986328125, -23.65552520751953, 17.093338012695312, 14.30453872680664, 8.4237060546875, -4.90655517578125, -3.096698760986328, 15.027175903320312, 32.114585876464844, 34.517822265625, 25.60379409790039, 8.120513916015625, 10.41448974609375, 45.634429931640625, 8.010551452636719, 35.97505187988281, 9.628644943237305, -6.8137664794921875, 9.203079223632812, 26.956497192382812, 23.498252868652344, 9.551416397094727, 35.46014404296875, 28.80048370361328, 4.999664306640625, -3.1103134155273438, 15.100349426269531, -2.5709457397460938, 12.624153137207031, 2.7441978454589844, 5.715063095092773, -1.574014663696289, 16.566909790039062, 22.255605697631836, 45.3978271484375, 8.118396759033203, 29.85006332397461, 26.438308715820312, 15.394096374511719, 0.02845001220703125, -0.3876953125, 19.315582275390625, 11.65340805053711, -1.7584075927734375, 10.381061553955078, 32.8297119140625, 9.229949951171875, 44.77598571777344, 43.150489807128906, 49.629119873046875, 1.3335151672363281, 8.085746765136719, 5.4931182861328125, 30.38720703125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000165.npy"} +{"epoch": 0.2422907488986784, "step": 166, "batch_size": 64, "mean": 23.267839431762695, "std": 23.729537963867188, "min": -9.22419548034668, "p10": 1.312217330932618, "median": 17.488187789916992, "p90": 60.76835327148438, "max": 111.61956787109375, "pos_frac": 0.90625, "sample": [71.93453979492188, 47.41914367675781, -1.0319347381591797, 76.0985107421875, 0.9863433837890625, 21.842029571533203, 7.381366729736328, 13.744293212890625, 59.11027526855469, -3.03009033203125, 26.84320068359375, 8.718854904174805, 3.1556529998779297, 30.065940856933594, 2.8353118896484375, 13.671689987182617, 21.39752197265625, 15.880401611328125, 37.05353927612305, 23.65113067626953, 18.897192001342773, 111.61956787109375, 3.074207305908203, 3.4822044372558594, -9.22419548034668, -5.923311233520508, 10.938468933105469, 16.362770080566406, 8.0517578125, 42.20611572265625, 61.47895812988281, 70.91293334960938, 20.508771896362305, 43.3389778137207, 9.915534973144531, 75.36898803710938, 11.476016998291016, 19.166786193847656, 29.464187622070312, 25.356857299804688, 10.950504302978516, 19.71530532836914, 6.118385314941406, 20.429588317871094, 50.51677703857422, 16.39560317993164, 66.38595581054688, 15.049110412597656, 19.852807998657227, 32.54559326171875, 12.778266906738281, 34.304718017578125, -3.7387542724609375, 3.9210205078125, 2.072589874267578, -3.9463539123535156, 30.95294952392578, 8.183670043945312, 18.580772399902344, 7.5385589599609375, 35.981815338134766, 10.950754165649414, 20.920143127441406, 8.481437683105469], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000166.npy"} +{"epoch": 0.24375917767988253, "step": 167, "batch_size": 64, "mean": 24.126216888427734, "std": 19.15192985534668, "min": -29.309402465820312, "p10": 4.420072555541992, "median": 24.386606216430664, "p90": 48.97690391540528, "max": 68.26449584960938, "pos_frac": 0.921875, "sample": [40.93754577636719, 20.558795928955078, 35.98396301269531, 34.196563720703125, 40.56683349609375, 49.45211410522461, 8.041656494140625, 16.92511749267578, 35.49976348876953, 27.376468658447266, 59.350830078125, 47.47997283935547, 25.39580535888672, -0.633026123046875, 8.0479736328125, 13.187944412231445, 29.314163208007812, 5.57745361328125, 31.59581756591797, 36.215850830078125, 12.533807754516602, 11.815643310546875, 15.122589111328125, 16.5506591796875, 25.23663330078125, -29.309402465820312, 68.26449584960938, 38.99378204345703, 0.0420074462890625, 50.938140869140625, -1.142852783203125, 8.682098388671875, 44.74013900756836, 6.788475036621094, 53.39068603515625, 30.18171501159668, 47.868080139160156, 28.411630630493164, 36.73088073730469, -12.249641418457031, 23.192718505859375, 45.734046936035156, 57.84123229980469, 23.839073181152344, 63.421661376953125, 7.212009429931641, 24.616771697998047, 10.301559448242188, 31.704025268554688, 33.665809631347656, 31.202255249023438, 13.714752197265625, 25.496089935302734, 10.036216735839844, 4.686567306518555, 39.378936767578125, 24.15644073486328, 14.582155227661133, 5.4399566650390625, -4.102073669433594, 18.131061553955078, 4.30586051940918, 10.8775634765625, 5.9820709228515625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000167.npy"} +{"epoch": 0.24522760646108663, "step": 168, "batch_size": 64, "mean": 19.041858673095703, "std": 15.39984130859375, "min": -10.596649169921875, "p10": 0.22038402557373055, "median": 15.394889831542969, "p90": 38.374985504150395, "max": 67.79399871826172, "pos_frac": 0.921875, "sample": [11.197067260742188, 12.3328857421875, 11.118095397949219, 6.600429534912109, 0.08102607727050781, 15.094263076782227, 27.81145477294922, 8.300237655639648, 10.636219024658203, -2.9877471923828125, -8.368820190429688, 24.33294677734375, 40.45603942871094, 38.48059844970703, 0.3007049560546875, 36.155487060546875, 30.82622528076172, 38.12855529785156, 35.2130126953125, 24.338478088378906, 33.8961067199707, 43.16607666015625, 47.210968017578125, 35.48657989501953, 12.364891052246094, 12.501518249511719, -0.0222930908203125, 37.1434326171875, 22.288700103759766, 8.876174926757812, 15.565109252929688, 13.069198608398438, -5.541786193847656, 9.283012390136719, 15.22467041015625, 36.27768325805664, 13.04677963256836, 0.1859607696533203, 9.198776245117188, 24.29718780517578, 12.733779907226562, 30.23919677734375, 5.237396240234375, 17.23603057861328, 24.272872924804688, 41.26322937011719, 8.318761825561523, 1.0819625854492188, 22.690093994140625, 12.64837646484375, 32.17535400390625, 4.036806106567383, 18.413358688354492, 5.544097900390625, 27.972068786621094, 19.496864318847656, 23.03874397277832, -10.596649169921875, 24.490062713623047, 4.784614562988281, 67.79399871826172, 7.817436218261719, 40.4860954284668, 33.938575744628906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000168.npy"} +{"epoch": 0.24669603524229075, "step": 169, "batch_size": 64, "mean": 17.245698928833008, "std": 21.965595245361328, "min": -21.029052734375, "p10": -4.611370849609374, "median": 12.022171020507812, "p90": 44.29730834960938, "max": 98.49119567871094, "pos_frac": 0.78125, "sample": [10.632064819335938, 24.65524673461914, -2.3065452575683594, 9.421577453613281, 17.120315551757812, 40.082664489746094, 10.321853637695312, 26.037673950195312, 16.248504638671875, 0.8570632934570312, -6.131649017333984, 26.808258056640625, -21.029052734375, -0.4720649719238281, 5.534900665283203, -3.6247100830078125, 2.2386932373046875, -2.619171142578125, 11.929641723632812, 26.69013214111328, 18.891582489013672, -0.13358306884765625, 12.01531982421875, -5.500247955322266, 45.7088508605957, 31.278976440429688, 16.25306510925293, 7.029632568359375, 22.707412719726562, 4.793107986450195, 34.68598175048828, 4.8132476806640625, 3.215595245361328, 17.374404907226562, 24.171245574951172, 25.273956298828125, 43.37347412109375, 1.1361141204833984, 22.292797088623047, 49.87847900390625, 14.718595504760742, 98.49119567871094, 2.7884445190429688, -0.0462188720703125, 29.288101196289062, 9.634700775146484, 12.029022216796875, -8.220924377441406, 69.931884765625, 8.393943786621094, 79.83767700195312, -5.373542785644531, 30.254776000976562, 7.1543121337890625, 44.6932373046875, 5.5480194091796875, 22.90125274658203, -5.0342254638671875, 16.807445526123047, 16.393497467041016, -2.9679412841796875, 26.925678253173828, 67.7008056640625, -9.779815673828125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000169.npy"} +{"epoch": 0.24816446402349487, "step": 170, "batch_size": 64, "mean": 21.571361541748047, "std": 18.903135299682617, "min": -24.69171142578125, "p10": 0.4831369400024424, "median": 19.555021286010742, "p90": 49.639342880249025, "max": 64.28924560546875, "pos_frac": 0.90625, "sample": [45.704322814941406, -0.7277145385742188, 15.38958740234375, 25.198272705078125, 33.1151237487793, 14.573596954345703, 29.53314208984375, 10.980819702148438, 27.88030242919922, -3.158721923828125, 38.7430419921875, -4.763759613037109, 21.780540466308594, -9.325645446777344, 4.510339736938477, 2.9411239624023438, 27.63367462158203, 64.28924560546875, 23.420875549316406, 51.26066589355469, 2.541248321533203, 1.4444713592529297, 41.62643814086914, 28.38015365600586, 0.071136474609375, 13.368614196777344, -24.69171142578125, 4.7211456298828125, 10.681488037109375, 42.16117477416992, 42.278900146484375, 48.447174072265625, 16.09967803955078, 27.65155029296875, 53.48914337158203, 3.0552291870117188, 53.05671691894531, 5.4099273681640625, 6.679225921630859, 55.3162841796875, 36.8823356628418, 15.745819091796875, 7.664867401123047, 50.150272369384766, 22.376136779785156, -0.066741943359375, 15.202787399291992, 7.243921279907227, 7.0033721923828125, 5.776887893676758, 25.535118103027344, 26.347412109375, 22.27349853515625, 17.32950210571289, 53.673728942871094, 10.030715942382812, 24.280685424804688, 40.93219757080078, 5.310070037841797, 32.47675323486328, 46.821563720703125, 14.728208541870117, 34.6478271484375, 7.433429718017578], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000170.npy"} +{"epoch": 0.24963289280469897, "step": 171, "batch_size": 64, "mean": 18.75889015197754, "std": 19.87282371520996, "min": -17.434616088867188, "p10": -0.8283037185668927, "median": 15.055505752563477, "p90": 41.9427833557129, "max": 95.2591552734375, "pos_frac": 0.890625, "sample": [3.8985366821289062, 43.82563781738281, 39.69757843017578, 6.558219909667969, 6.363124847412109, -1.608551025390625, 3.0163421630859375, 12.818046569824219, -9.972412109375, 26.396987915039062, 4.3884124755859375, 22.485698699951172, 25.830230712890625, 16.425888061523438, 16.85724639892578, 20.22882080078125, 15.183334350585938, 3.5000152587890625, 14.927677154541016, -2.9083786010742188, 35.29151153564453, 24.20395278930664, 25.40753173828125, 12.878171920776367, 56.238258361816406, 24.593799591064453, 14.673301696777344, 95.2591552734375, -17.434616088867188, 20.544105529785156, 12.92388916015625, -9.128097534179688, 45.35595703125, 30.522903442382812, 19.492198944091797, 4.787384033203125, 45.6455078125, 32.223655700683594, 6.008182525634766, 82.02218627929688, 20.282745361328125, 23.32592010498047, 35.53521728515625, 7.918107986450195, -16.160171508789062, 16.00030517578125, 34.574684143066406, 25.381622314453125, 1.0402717590332031, -2.7800216674804688, 4.150016784667969, 11.190093994140625, 34.867820739746094, 20.352399826049805, 13.69390869140625, 14.908079147338867, 12.1920166015625, 37.99012756347656, 1.1392364501953125, 14.406867980957031, 4.985294342041016, 0.9922733306884766, 42.90501403808594, 12.255744934082031], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000171.npy"} +{"epoch": 0.2511013215859031, "step": 172, "batch_size": 64, "mean": 18.185348510742188, "std": 18.891036987304688, "min": -21.518531799316406, "p10": -4.333843994140624, "median": 16.94161891937256, "p90": 42.5125701904297, "max": 74.30046081542969, "pos_frac": 0.84375, "sample": [13.422815322875977, 24.624343872070312, -21.518531799316406, 20.718421936035156, 11.592735290527344, 2.5240402221679688, 36.51715087890625, 20.318450927734375, 15.80804443359375, 1.1821708679199219, -7.154579162597656, 12.769670486450195, -3.4474258422851562, 22.420196533203125, 26.052764892578125, 22.64690399169922, 15.593631744384766, 11.630172729492188, -6.85276985168457, 44.06202697753906, 1.7847442626953125, 30.43612289428711, 8.428390502929688, 31.05242919921875, 3.2822265625, 15.310550689697266, 33.64192199707031, 37.22713088989258, 4.766746520996094, 68.41705322265625, -2.2630538940429688, 19.522903442382812, 43.96466827392578, 13.098876953125, 35.33610534667969, 61.347747802734375, 74.30046081542969, 18.075193405151367, 22.75623321533203, 22.75902557373047, -4.713737487792969, 6.8633575439453125, 15.468399047851562, 24.603727340698242, 20.463478088378906, -3.0570831298828125, 10.101125717163086, 18.615026473999023, 5.4397125244140625, 39.50653076171875, 23.318756103515625, 2.7264862060546875, 3.10723876953125, 34.62820053100586, 23.177513122558594, -14.970367431640625, 29.87179946899414, 20.040847778320312, 50.153839111328125, 15.037017822265625, -4.795989990234375, -10.904380798339844, 43.800872802734375, 9.224258422851562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000172.npy"} +{"epoch": 0.2525697503671072, "step": 173, "batch_size": 64, "mean": 18.32070541381836, "std": 17.518054962158203, "min": -13.054893493652344, "p10": -2.5041090011596676, "median": 16.15769386291504, "p90": 42.80900421142579, "max": 67.28913879394531, "pos_frac": 0.84375, "sample": [7.7194061279296875, 16.130722045898438, 18.761075973510742, 18.971473693847656, -4.26751708984375, 31.486968994140625, 5.971488952636719, 34.77268981933594, 42.22499084472656, 3.2294235229492188, 21.835205078125, -4.724822998046875, 3.399141311645508, -2.0040054321289062, 18.565963745117188, 43.142181396484375, 13.303573608398438, 23.318859100341797, 14.323568344116211, 36.436004638671875, 21.803802490234375, 6.021827697753906, 11.640602111816406, 8.933273315429688, 8.922462463378906, 16.793365478515625, 63.20866394042969, 7.872255325317383, 27.95098876953125, 21.328643798828125, 15.943061828613281, 10.791112899780273, -11.83111572265625, 30.691843032836914, 33.248443603515625, 17.311416625976562, 43.059295654296875, 46.512908935546875, -2.7184391021728516, 54.839393615722656, 35.04509353637695, 23.453842163085938, 5.724498748779297, 4.852180480957031, 8.256927490234375, 11.068721771240234, 67.28913879394531, 38.02988815307617, -9.620716094970703, 18.048919677734375, 10.133203506469727, 16.85129737854004, 14.611186981201172, -0.00079345703125, -13.054893493652344, 55.08819580078125, 12.200918197631836, -0.7836456298828125, 16.18466567993164, 17.880739212036133, 28.732940673828125, 10.544891357421875, 29.950672149658203, -2.882904052734375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000173.npy"} +{"epoch": 0.2540381791483113, "step": 174, "batch_size": 64, "mean": 18.424602508544922, "std": 18.221277236938477, "min": -17.52099609375, "p10": -3.158850860595703, "median": 16.053077697753906, "p90": 39.663675689697264, "max": 73.0679931640625, "pos_frac": 0.859375, "sample": [4.596397399902344, 13.849365234375, -4.508872985839844, 0.9328765869140625, 13.496368408203125, 11.664237976074219, 6.450572967529297, 7.421590805053711, 20.858963012695312, 37.37318420410156, 38.44920349121094, 2.9177398681640625, 23.890520095825195, 5.236379623413086, 26.437360763549805, 21.826210021972656, 15.123291015625, 18.217453002929688, 23.050804138183594, 49.67649841308594, 10.02349853515625, 14.679580688476562, 10.335243225097656, 1.996419906616211, 33.90362548828125, -16.3951416015625, 26.70465087890625, 40.43479919433594, 5.929100036621094, 11.837076187133789, 27.4421329498291, -3.561260223388672, 15.936111450195312, -5.037330627441406, 28.53509521484375, -0.8816986083984375, 18.91925048828125, 22.925479888916016, -11.898727416992188, 52.73512268066406, 55.2667236328125, 73.0679931640625, 5.6669921875, 16.1700439453125, -2.825094223022461, -17.52099609375, -3.301889419555664, 21.089340209960938, 17.598953247070312, 20.29584503173828, 39.610382080078125, 15.905645370483398, 14.161163330078125, 34.825904846191406, 26.489219665527344, 19.963577270507812, 39.68651580810547, 30.367027282714844, 4.2506256103515625, 2.994649887084961, 65.84170532226562, 29.426183700561523, 34.732601165771484, 13.88819694519043], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000174.npy"} +{"epoch": 0.2555066079295154, "step": 175, "batch_size": 64, "mean": 23.7386531829834, "std": 19.637550354003906, "min": -6.104118347167969, "p10": 3.8449869155883794, "median": 17.19618797302246, "p90": 55.349117279052734, "max": 72.57981872558594, "pos_frac": 0.984375, "sample": [8.291868209838867, 10.86480712890625, 18.50840187072754, 3.598358154296875, 34.239356994628906, 16.402587890625, 38.96089172363281, 15.470619201660156, 6.138936996459961, 62.19548034667969, 65.05230712890625, 3.2247276306152344, 5.411712646484375, 66.16265869140625, 16.79953384399414, -6.104118347167969, 33.78439712524414, 28.365768432617188, 17.59284210205078, 48.228477478027344, 27.626319885253906, 7.192466735839844, 26.59062957763672, 72.57981872558594, 2.4419174194335938, 8.472450256347656, 57.38981628417969, 22.09851837158203, 24.202163696289062, 10.121997833251953, 3.091888427734375, 6.206432342529297, 55.26043701171875, 15.723342895507812, 13.628128051757812, 10.16366958618164, 3.164409637451172, 46.78514099121094, 4.420454025268555, 8.680862426757812, 42.596004486083984, 25.417991638183594, 30.425735473632812, 49.887542724609375, 0.9734420776367188, 61.120033264160156, 34.56294250488281, 30.500587463378906, 11.640628814697266, 47.83824157714844, 45.87286376953125, 13.624296188354492, 21.696929931640625, 14.648475646972656, 9.154142379760742, 7.698123931884766, 24.400039672851562, 11.068458557128906, 27.754135131835938, 55.387123107910156, 19.311767578125, 5.250801086425781, 4.9183197021484375, 4.4947052001953125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000175.npy"} +{"epoch": 0.25697503671071953, "step": 176, "batch_size": 64, "mean": 21.942890167236328, "std": 17.43705940246582, "min": -10.739959716796875, "p10": 0.44505233764648505, "median": 20.91852569580078, "p90": 45.50640487670899, "max": 64.22175598144531, "pos_frac": 0.90625, "sample": [27.73638916015625, 11.313148498535156, 12.380325317382812, 36.35987854003906, 28.140914916992188, 22.850868225097656, 2.4967575073242188, 55.446380615234375, -2.8977813720703125, 13.018417358398438, 7.4761505126953125, 62.47210693359375, 22.717966079711914, 2.4007644653320312, 48.769989013671875, 26.077651977539062, 11.37054443359375, 58.95983123779297, 23.770828247070312, 2.2878494262695312, -2.2921829223632812, 16.03083038330078, 23.203453063964844, -3.3052406311035156, 46.030235290527344, -1.2710456848144531, 15.28253173828125, 29.05603790283203, 29.774932861328125, 5.043006896972656, 23.808547973632812, 15.541458129882812, 13.102066040039062, -10.739959716796875, 21.862831115722656, 28.88941192626953, 18.869369506835938, 10.247756958007812, 16.81890106201172, 24.403545379638672, 35.598838806152344, 1.6350593566894531, 4.9805145263671875, 1.1263389587402344, 22.55499267578125, 25.411842346191406, 33.134986877441406, 19.974220275878906, 36.39442443847656, 55.357879638671875, 28.071863174438477, 11.203609466552734, 18.52147674560547, -1.9733238220214844, 39.11494064331055, 38.70635986328125, 0.15307235717773438, 19.647491455078125, 44.209007263183594, 11.741222381591797, 44.28413391113281, 64.22175598144531, 19.86626625061035, 36.90245056152344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000176.npy"} +{"epoch": 0.25844346549192365, "step": 177, "batch_size": 64, "mean": 21.963430404663086, "std": 20.603771209716797, "min": -6.3747100830078125, "p10": -1.341127014160156, "median": 17.294994354248047, "p90": 47.3712158203125, "max": 85.78280639648438, "pos_frac": 0.875, "sample": [19.265960693359375, 13.2525634765625, 26.134231567382812, 16.18670654296875, 15.919815063476562, 32.41204071044922, 5.9185028076171875, 15.479080200195312, 37.56316375732422, 36.683135986328125, 47.61354064941406, 0.2812995910644531, 23.906291961669922, 40.058677673339844, 13.886186599731445, 18.646053314208984, 11.818588256835938, 82.59356689453125, 0.8457107543945312, 44.64556884765625, 12.48574447631836, 29.378299713134766, -6.3747100830078125, 46.96575927734375, 0.11892509460449219, 13.908292770385742, 66.14717102050781, -3.5936203002929688, -1.0844268798828125, 28.499000549316406, 17.470808029174805, 36.88972473144531, 56.318397521972656, 2.0173416137695312, 18.673233032226562, 6.589202880859375, 13.368690490722656, 58.387996673583984, 0.19179344177246094, -1.9685897827148438, -3.861663818359375, 25.320663452148438, 5.190893173217773, 15.707168579101562, 11.922027587890625, 27.87067413330078, 46.90630340576172, -1.6979713439941406, 8.134510040283203, -1.451141357421875, 28.86846923828125, 21.991960525512695, 36.15505599975586, -5.854579925537109, 17.81005859375, 9.141883850097656, 17.11918067932129, 20.065874099731445, 47.54498291015625, 6.6000518798828125, 85.78280639648438, 45.30438232421875, 32.958396911621094, 10.62982177734375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000177.npy"} +{"epoch": 0.2599118942731278, "step": 178, "batch_size": 64, "mean": 24.486705780029297, "std": 22.665319442749023, "min": -12.7283935546875, "p10": -0.9489757537841789, "median": 18.361080169677734, "p90": 56.17904701232911, "max": 89.21905517578125, "pos_frac": 0.875, "sample": [32.9039421081543, 0.10095977783203125, 41.98322296142578, -8.508956909179688, 41.67784881591797, 15.476425170898438, 28.30542755126953, 8.297168731689453, 3.6275787353515625, 39.12052917480469, 14.437957763671875, 40.383750915527344, 44.29994201660156, 19.129173278808594, 17.12371826171875, 43.60455322265625, 76.81564331054688, 17.592987060546875, 89.21905517578125, -2.6146697998046875, 68.99333190917969, 17.489286422729492, 14.795440673828125, 42.80111312866211, 28.022069931030273, 55.35795211791992, 41.2230224609375, -7.267189025878906, 19.352981567382812, 10.455131530761719, -2.5479202270507812, 21.50543212890625, 16.33753204345703, 1.7980232238769531, 12.886154174804688, 57.71000671386719, 11.8974609375, -1.3058700561523438, 11.672447204589844, 30.325897216796875, 12.999256134033203, 17.55438232421875, -12.7283935546875, 72.19596862792969, -0.11622238159179688, 39.28850555419922, 23.97079086303711, 25.42394256591797, 2.085611343383789, 56.53094482421875, 14.746683120727539, 43.128700256347656, 22.007293701171875, 2.819551467895508, 27.9921875, 8.759834289550781, 30.387725830078125, 7.56829833984375, 10.61920166015625, 48.089752197265625, 63.4481201171875, -11.259284973144531, 44.207237243652344, 2.950580596923828], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000178.npy"} +{"epoch": 0.26138032305433184, "step": 179, "batch_size": 64, "mean": 24.460594177246094, "std": 23.770587921142578, "min": -28.762054443359375, "p10": -3.7336040496826164, "median": 24.872196197509766, "p90": 54.65450744628907, "max": 82.0667724609375, "pos_frac": 0.828125, "sample": [24.980796813964844, 45.2735710144043, 45.27772521972656, 62.172943115234375, 17.41313362121582, -11.186370849609375, 52.13356018066406, 28.79354476928711, 0.7701568603515625, -3.0914878845214844, 24.763595581054688, 19.304765701293945, 6.288047790527344, 15.840415954589844, 4.2908935546875, 43.35986328125, 0.7562408447265625, 49.750274658203125, 26.711711883544922, 1.342132568359375, 29.119165420532227, -1.8783416748046875, 54.862762451171875, 10.164447784423828, 56.01775360107422, 37.45043182373047, 30.71307373046875, 65.58625793457031, 49.88276672363281, 30.463050842285156, 37.672569274902344, 82.0667724609375, 69.36885070800781, 11.506065368652344, 31.162755966186523, 23.096649169921875, -1.458160400390625, 20.29128646850586, 1.6286468505859375, 32.42826843261719, 38.276512145996094, -7.44526481628418, 21.095335006713867, 8.048477172851562, 52.286651611328125, 34.231109619140625, -9.630699157714844, 14.264266967773438, 34.540958404541016, -0.45766448974609375, 8.7738037109375, 36.69770812988281, 20.2838134765625, 48.1397705078125, 54.1685791015625, 48.50909423828125, -28.762054443359375, 55.48393249511719, -7.866310119628906, 4.505302429199219, -22.192642211914062, 26.020263671875, 15.425390243530273, -4.008796691894531], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000179.npy"} +{"epoch": 0.26284875183553597, "step": 180, "batch_size": 64, "mean": 24.595914840698242, "std": 19.27443504333496, "min": -8.049385070800781, "p10": -0.45963649749755786, "median": 21.61882495880127, "p90": 52.32546653747559, "max": 74.037841796875, "pos_frac": 0.890625, "sample": [25.461633682250977, 42.923553466796875, 22.161575317382812, -5.99053955078125, -5.486572265625, 56.16070556640625, 6.658138275146484, 14.08074951171875, 43.960960388183594, 28.247039794921875, -0.7602252960205078, 33.96430206298828, 19.50726318359375, 33.21124267578125, 23.52167510986328, 9.273193359375, 40.51255798339844, 27.796916961669922, 34.204124450683594, 14.377822875976562, 31.32811737060547, 0.24173736572265625, 12.480945587158203, 50.623069763183594, 19.56110382080078, 60.13883972167969, 27.21289825439453, 26.523391723632812, 7.719562530517578, 15.44571304321289, 45.03753662109375, 5.692998886108398, 39.59788513183594, 23.37291717529297, 49.167327880859375, -1.622976303100586, 20.26618194580078, 42.48712921142578, 6.73309326171875, 53.28926086425781, 21.076074600219727, 2.7561111450195312, 57.546836853027344, 11.371147155761719, -5.1487884521484375, 12.754981994628906, 15.837875366210938, 28.060226440429688, 42.66755676269531, 15.072196960449219, 18.673297882080078, 53.0550651550293, 6.947175979614258, 74.037841796875, 18.060134887695312, -4.019065856933594, 37.876441955566406, 19.855758666992188, 11.083351135253906, -8.049385070800781, 68.06851196289062, 33.559112548828125, 12.742515563964844, 31.170761108398438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000180.npy"} +{"epoch": 0.2643171806167401, "step": 181, "batch_size": 64, "mean": 17.567216873168945, "std": 18.16219711303711, "min": -14.637115478515625, "p10": -2.7568309783935545, "median": 14.257137298583984, "p90": 44.96337890625001, "max": 74.3797607421875, "pos_frac": 0.859375, "sample": [4.174339294433594, 7.347339630126953, 12.076364517211914, 30.389148712158203, 45.79280090332031, -2.7779178619384766, -2.4420242309570312, 16.960800170898438, 14.183830261230469, 13.7373046875, 11.937076568603516, 10.882892608642578, 1.373666763305664, 0.08065032958984375, -14.637115478515625, 35.954734802246094, 17.851863861083984, 13.651657104492188, 27.26988983154297, 8.90414047241211, 23.853073120117188, -10.75564956665039, 2.56475830078125, 43.02806091308594, 3.1007041931152344, 16.102256774902344, 16.42938232421875, 74.3797607421875, 28.789390563964844, 7.039134979248047, 33.323081970214844, 11.947822570800781, -7.737335205078125, 16.314346313476562, 16.876052856445312, 8.325754165649414, 6.689992904663086, 14.256416320800781, 47.65711975097656, -6.236114501953125, 10.028985977172852, -5.180633544921875, 51.28364562988281, 15.567119598388672, 39.00682067871094, 28.416486740112305, -7.9048004150390625, 29.03179359436035, 14.257858276367188, 21.884241104125977, 49.97068786621094, 62.500213623046875, 24.020469665527344, 21.350574493408203, 19.893234252929688, -2.7076282501220703, 0.9549045562744141, 13.145050048828125, 57.088653564453125, 29.98491668701172, 17.017494201660156, 16.329177856445312, 7.2037811279296875, 12.499359130859375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000181.npy"} +{"epoch": 0.2657856093979442, "step": 182, "batch_size": 64, "mean": 26.983417510986328, "std": 23.57621955871582, "min": -34.21168518066406, "p10": 0.35819778442382855, "median": 24.671287536621094, "p90": 58.815073013305664, "max": 71.30987548828125, "pos_frac": 0.90625, "sample": [57.0606689453125, 28.4024658203125, 59.679771423339844, 32.978607177734375, 24.471786499023438, 24.87078857421875, 43.919921875, 42.735321044921875, 65.32948303222656, 35.96091842651367, 17.7894287109375, 19.27100372314453, 7.875997543334961, 0.8530521392822266, 55.09123992919922, -34.21168518066406, 63.931053161621094, 1.5351486206054688, 4.2689971923828125, 15.080787658691406, 60.260467529296875, 1.9369773864746094, 70.35517120361328, 28.206878662109375, 15.726303100585938, -5.031711578369141, 56.772186279296875, 9.185298919677734, 10.237979888916016, 25.162261962890625, 48.068641662597656, 57.786285400390625, 17.802696228027344, -10.749404907226562, 32.557762145996094, 45.17223358154297, 27.356674194335938, 13.521255493164062, 41.150054931640625, 44.35630798339844, 0.17905426025390625, 15.502042770385742, 11.133344650268555, 44.188358306884766, 24.306991577148438, 33.562049865722656, 17.00958251953125, 58.89748764038086, -0.5267791748046875, 0.7761993408203125, 71.30987548828125, 45.84050750732422, 22.456069946289062, 4.7701873779296875, -7.7284088134765625, 6.747077941894531, -13.680168151855469, 4.745479583740234, 52.144195556640625, 23.5931396484375, 58.622772216796875, 14.353336334228516, 35.55742645263672, 46.44984436035156], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000182.npy"} +{"epoch": 0.26725403817914833, "step": 183, "batch_size": 64, "mean": 20.88549041748047, "std": 18.435461044311523, "min": -2.7816314697265625, "p10": 1.0703777313232423, "median": 15.518823623657227, "p90": 50.52491302490235, "max": 70.30805969238281, "pos_frac": 0.953125, "sample": [39.68345642089844, 31.58416748046875, 9.750997543334961, 14.518251419067383, 1.3986778259277344, 54.8327751159668, 32.69456481933594, 38.572052001953125, 30.502418518066406, 6.861167907714844, 8.901138305664062, 24.244285583496094, 2.1755714416503906, -0.2162914276123047, 1.0068206787109375, 28.01152801513672, 70.30805969238281, 57.064598083496094, 32.572425842285156, 14.990888595581055, 22.26276397705078, 15.60653305053711, 54.889923095703125, 33.36687088012695, 15.431114196777344, 1.2186775207519531, 26.15139389038086, 7.133186340332031, 7.0652923583984375, 0.4468841552734375, 31.09032440185547, 20.514249801635742, 1.7441120147705078, 0.9214382171630859, 32.597190856933594, 1.9866943359375, 20.732120513916016, 36.40174865722656, 50.26177215576172, 58.19231414794922, 5.2617340087890625, 11.469402313232422, 61.77581787109375, 0.91290283203125, 5.986717224121094, -2.7816314697265625, 32.25328826904297, -1.7386474609375, 7.814666748046875, 2.627899169921875, 50.63768768310547, 19.77227783203125, 35.201812744140625, 31.98503875732422, 10.162113189697266, 13.427215576171875, 4.091102600097656, 39.27661895751953, 10.521026611328125, 21.65252685546875, 1.9625320434570312, 17.49361801147461, 6.573760986328125, 12.859687805175781], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000183.npy"} +{"epoch": 0.2687224669603524, "step": 184, "batch_size": 64, "mean": 15.659937858581543, "std": 17.907655715942383, "min": -10.619544982910156, "p10": -1.9542850494384763, "median": 10.777595520019531, "p90": 35.1061264038086, "max": 84.0571517944336, "pos_frac": 0.84375, "sample": [6.938751220703125, -1.6135482788085938, 14.960426330566406, 25.663650512695312, 21.940994262695312, 17.928543090820312, -0.8386917114257812, 6.24786376953125, 25.20079803466797, -2.1003150939941406, 84.0571517944336, 47.350982666015625, 0.8329944610595703, 20.7479248046875, 13.010673522949219, 10.44158935546875, 14.437652587890625, 21.082176208496094, 22.189189910888672, 50.1439208984375, 1.4724006652832031, 31.2625732421875, -1.521728515625, 2.7786788940429688, 5.414054870605469, 26.333755493164062, 9.771066665649414, 4.210868835449219, 25.708282470703125, 7.008951187133789, 25.740264892578125, 2.9896240234375, 38.3704833984375, 35.74971008300781, -7.892917633056641, 45.79204559326172, 5.635993957519531, -10.619544982910156, 7.887119293212891, 20.09109878540039, 33.60443115234375, 0.3410186767578125, 6.014720916748047, 11.13212776184082, 22.799209594726562, 31.687911987304688, 4.023942947387695, 8.292560577392578, 7.3383331298828125, -4.859230041503906, -4.417804718017578, 24.414751052856445, -2.8533554077148438, 15.522968292236328, 4.196891784667969, 8.90704345703125, 10.538871765136719, 30.073272705078125, 8.825199127197266, 11.016319274902344, 12.545501708984375, -5.5715789794921875, 18.16794204711914, 75.689453125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000184.npy"} +{"epoch": 0.2701908957415565, "step": 185, "batch_size": 64, "mean": 15.436178207397461, "std": 19.218881607055664, "min": -24.6705322265625, "p10": -2.5286979675292964, "median": 11.274328231811523, "p90": 44.805409622192386, "max": 81.427001953125, "pos_frac": 0.75, "sample": [-24.6705322265625, -2.751720428466797, 0.5404758453369141, 31.761871337890625, 20.956737518310547, 7.439027786254883, -3.1864013671875, 14.20492172241211, 32.74761962890625, 16.450122833251953, 7.415424346923828, 31.99897003173828, 21.63542938232422, 5.17304801940918, 26.300216674804688, 24.240203857421875, -1.6454544067382812, 15.563629150390625, 7.8212738037109375, 3.98663330078125, -0.7111663818359375, 22.13062858581543, 0.6834564208984375, 24.966327667236328, 7.714344024658203, 20.974037170410156, -1.0926055908203125, 24.264205932617188, 5.401203155517578, 18.414566040039062, 33.601806640625, -2.9028282165527344, 44.29693603515625, 45.64978790283203, -7.620025634765625, 29.108306884765625, -1.6529693603515625, 35.30109405517578, 49.498802185058594, 4.201068878173828, -1.3081932067871094, 14.221899032592773, -0.8075141906738281, -5.5158843994140625, -0.7381229400634766, 4.990930557250977, 25.323646545410156, 53.72130584716797, 45.0233268737793, 11.339836120605469, 81.427001953125, 5.135768890380859, 22.30316925048828, 8.938091278076172, -0.32696533203125, 51.4374885559082, 11.208820343017578, 25.94947052001953, -22.399139404296875, 47.140380859375, 1.7903060913085938, 6.586875915527344, 16.272720336914062, -2.008312225341797], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000185.npy"} +{"epoch": 0.27165932452276065, "step": 186, "batch_size": 64, "mean": 25.340028762817383, "std": 18.850744247436523, "min": -9.084362030029297, "p10": 4.139540100097657, "median": 22.58097743988037, "p90": 54.50668716430665, "max": 76.47590637207031, "pos_frac": 0.953125, "sample": [4.29241943359375, 22.38452911376953, 56.68763732910156, 34.19988250732422, 27.16425323486328, 11.549423217773438, 39.59001159667969, -0.5074577331542969, 62.44767761230469, 42.29914093017578, 17.676025390625, 43.297027587890625, 4.0740203857421875, 11.7213134765625, 25.703121185302734, 5.687145233154297, 22.77742576599121, 5.317268371582031, 13.637947082519531, 8.01446533203125, 33.2376708984375, 9.952442169189453, 17.56133270263672, 36.295570373535156, 15.016250610351562, 26.865917205810547, 33.73855972290039, 55.297691345214844, 52.6610107421875, 14.845840454101562, 50.15907287597656, 13.505277633666992, 30.645164489746094, 11.544458389282227, 12.190238952636719, 30.407611846923828, 63.10394287109375, 65.03268432617188, 37.01791763305664, 2.2515830993652344, 29.9627685546875, 0.6248798370361328, 35.89289093017578, 16.458999633789062, 18.690956115722656, 76.47590637207031, 0.8238677978515625, 36.369110107421875, -2.6929168701171875, 7.454048156738281, 46.00091552734375, 57.24329376220703, 23.782875061035156, 36.26726150512695, 17.979843139648438, 38.12207794189453, 11.606376647949219, -9.084362030029297, 10.041679382324219, 27.00971221923828, 21.045578002929688, 24.586044311523438, 12.032737731933594, 15.7237548828125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000186.npy"} +{"epoch": 0.27312775330396477, "step": 187, "batch_size": 64, "mean": 18.950557708740234, "std": 20.59282684326172, "min": -27.73986053466797, "p10": -1.6332426071166992, "median": 17.493057250976562, "p90": 49.44420089721681, "max": 68.41879272460938, "pos_frac": 0.828125, "sample": [-27.73986053466797, 67.32589721679688, -0.1396160125732422, 14.783985137939453, -15.038990020751953, 20.065383911132812, 34.39283752441406, 30.04083251953125, 15.59249496459961, 23.55426788330078, 16.430255889892578, 11.924028396606445, 46.54710388183594, 12.828498840332031, 17.313034057617188, 26.375640869140625, 44.859745025634766, 4.3229827880859375, 0.42734527587890625, -1.4712104797363281, 21.511611938476562, -1.6347503662109375, 68.41879272460938, 26.003089904785156, 20.804397583007812, 22.41855239868164, 65.95095825195312, -3.1816673278808594, 35.79697799682617, 3.104337692260742, -2.8512115478515625, 31.538795471191406, 2.6294403076171875, -1.6297245025634766, 1.0314979553222656, 50.685813903808594, 11.474906921386719, 21.821569442749023, -1.5290069580078125, 29.51360321044922, 41.69834899902344, 58.47705078125, 17.673080444335938, 4.842399597167969, 30.49615478515625, 8.116914749145508, 13.288829803466797, 5.141632080078125, -21.45349884033203, 24.287986755371094, 3.2402267456054688, 7.6038818359375, 21.989501953125, 17.767478942871094, 30.372133255004883, 13.177627563476562, 31.364837646484375, 2.31634521484375, 17.847122192382812, 60.38473892211914, 51.34759521484375, 19.817598342895508, 12.058137893676758, -3.2930450439453125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000187.npy"} +{"epoch": 0.2745961820851689, "step": 188, "batch_size": 64, "mean": 21.670263290405273, "std": 20.467336654663086, "min": -29.131370544433594, "p10": -1.0429559707641598, "median": 16.743595123291016, "p90": 52.89972839355471, "max": 64.52778625488281, "pos_frac": 0.859375, "sample": [36.25178146362305, 17.04737091064453, 55.416656494140625, 1.6664924621582031, 40.46782684326172, 7.9229736328125, 3.0340042114257812, -7.328971862792969, 35.332000732421875, 42.030357360839844, 12.999122619628906, -1.3772964477539062, 27.215728759765625, 25.378738403320312, 47.54998779296875, 34.81001281738281, 16.393592834472656, 13.385566711425781, 14.660400390625, -0.5819988250732422, 5.0603485107421875, 2.7235183715820312, 23.945144653320312, 16.39318084716797, 5.551780700683594, 19.91503143310547, 11.690635681152344, 9.102275848388672, 64.09249114990234, 6.499786376953125, 62.348365783691406, -29.131370544433594, 42.921539306640625, 32.136146545410156, 64.52778625488281, 18.032424926757812, 12.12554931640625, 55.192474365234375, 3.5313758850097656, 46.12004852294922, 31.129806518554688, 13.024608612060547, -8.266983032226562, 16.077646255493164, 41.735923767089844, 12.675262451171875, 44.63443374633789, -3.7311058044433594, 25.233295440673828, 58.19097137451172, -1.240509033203125, 31.183231353759766, 19.355228424072266, -4.044225692749023, 33.11729049682617, 16.4398193359375, 23.22701072692871, 61.55162048339844, 39.16425704956055, 10.105997085571289, 20.200042724609375, -0.07828330993652344, 10.343132019042969, 1.8155059814453125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000188.npy"} +{"epoch": 0.27606461086637296, "step": 189, "batch_size": 64, "mean": 22.340126037597656, "std": 19.784210205078125, "min": -26.667800903320312, "p10": 2.3460739135742195, "median": 18.112021446228027, "p90": 52.62342262268067, "max": 70.375, "pos_frac": 0.921875, "sample": [10.801513671875, 44.125492095947266, -0.165618896484375, 61.883819580078125, 9.634040832519531, 2.024667739868164, 14.143159866333008, -2.9840240478515625, 3.0960216522216797, 31.892555236816406, 32.63309860229492, 60.847389221191406, 38.617340087890625, 10.462310791015625, 48.31847381591797, 20.029876708984375, 18.23017120361328, 7.566045761108398, 17.145065307617188, 10.791519165039062, -1.6565570831298828, 34.871604919433594, 19.807098388671875, 42.93312072753906, 25.334144592285156, 9.904052734375, 3.598785400390625, 18.440166473388672, 8.720233917236328, 45.66157531738281, 13.893913269042969, 26.358007431030273, 16.538536071777344, 54.23835754394531, 4.47552490234375, 17.993871688842773, 1.3108654022216797, 69.10540771484375, 24.85298728942871, 17.07331085205078, 3.915496826171875, 27.30536651611328, 20.620445251464844, 13.535224914550781, 7.189943313598633, 58.579078674316406, 44.758056640625, 70.375, 14.852470397949219, 53.16451644897461, 51.360870361328125, 4.357423782348633, 14.795921325683594, 35.904388427734375, 7.766851425170898, 10.967994689941406, 30.930023193359375, 20.425716400146484, 22.152393341064453, -26.667800903320312, 4.105255126953125, 27.2530517578125, -2.3018646240234375, 21.874267578125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000189.npy"} +{"epoch": 0.2775330396475771, "step": 190, "batch_size": 64, "mean": 23.570049285888672, "std": 21.541614532470703, "min": -9.24749755859375, "p10": 0.8835657119750978, "median": 19.14565658569336, "p90": 53.372885131835936, "max": 102.48904418945312, "pos_frac": 0.921875, "sample": [0.8365631103515625, 35.126625061035156, 14.517959594726562, 6.2117156982421875, 22.260818481445312, 8.925315856933594, 48.167449951171875, 19.38349151611328, 2.6263809204101562, 53.66143798828125, 28.938720703125, 6.7012939453125, 15.255653381347656, 11.961273193359375, 68.85089111328125, 21.57086181640625, 16.622726440429688, 14.05804443359375, 41.447532653808594, 19.804824829101562, -0.6981372833251953, 48.739715576171875, -3.813323974609375, 31.64897918701172, 30.0045166015625, 20.402015686035156, 9.062164306640625, 53.21424865722656, 60.54124450683594, 33.326904296875, 15.277786254882812, 39.86357116699219, 1.259979248046875, 3.032194137573242, 19.841590881347656, 31.191368103027344, 23.50170135498047, 7.6292877197265625, -8.42279052734375, 14.256515502929688, 53.44087219238281, 0.5574951171875, 22.504737854003906, 18.61090087890625, 1.4901885986328125, 3.2472076416015625, 31.066638946533203, 51.206451416015625, -2.6351318359375, 27.954933166503906, -9.24749755859375, 66.70027160644531, 12.408981323242188, 13.250104904174805, 24.380844116210938, 28.319305419921875, 11.521141052246094, 102.48904418945312, 0.9932384490966797, 52.3216552734375, 18.907821655273438, 58.45356750488281, 18.291805267333984, 15.459503173828125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000190.npy"} +{"epoch": 0.2790014684287812, "step": 191, "batch_size": 64, "mean": 27.898746490478516, "std": 24.585590362548828, "min": -12.43438720703125, "p10": 1.048148345947266, "median": 23.95552635192871, "p90": 58.53111572265625, "max": 112.023193359375, "pos_frac": 0.921875, "sample": [57.9144287109375, 31.216140747070312, 64.53398132324219, 6.928443908691406, 49.0718994140625, 29.679550170898438, 33.52764892578125, 42.14872741699219, -1.1118812561035156, 47.111907958984375, 34.025909423828125, 35.71228790283203, 24.06378173828125, 37.81303405761719, 48.54473876953125, 12.051582336425781, 4.321434020996094, 5.330810546875, 35.05253601074219, 42.857276916503906, 20.793380737304688, 17.56182861328125, 18.31317138671875, 70.1051025390625, 27.067480087280273, 17.49591064453125, -1.3336410522460938, 90.13412475585938, 10.668792724609375, 13.896621704101562, 31.25762939453125, -12.269710540771484, 9.943649291992188, 4.469455718994141, 14.886627197265625, 25.17498779296875, 30.075592041015625, 23.725379943847656, 86.42060852050781, 0.8797531127929688, -0.9659976959228516, 31.79644775390625, 35.767127990722656, -12.43438720703125, 7.7528533935546875, 112.023193359375, 17.481252670288086, 7.697601318359375, 14.093185424804688, 58.79541015625, 18.539474487304688, 42.20384216308594, 23.847270965576172, 0.8084030151367188, 38.92437744140625, 22.52556610107422, 13.361030578613281, 25.360004425048828, 1.441070556640625, 13.913002014160156, 87.19013977050781, 19.08441162109375, 36.21559143066406, 30.03797149658203], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000191.npy"} +{"epoch": 0.28046989720998533, "step": 192, "batch_size": 64, "mean": 23.677879333496094, "std": 22.121126174926758, "min": -8.428630828857422, "p10": -1.9902786254882812, "median": 22.301986694335938, "p90": 52.34594879150391, "max": 94.81887817382812, "pos_frac": 0.828125, "sample": [14.004096984863281, -1.1980667114257812, 29.795629501342773, 31.890403747558594, 51.310401916503906, 26.905357360839844, 0.02851104736328125, -5.627481460571289, 10.624282836914062, -2.68585205078125, 5.9726409912109375, 22.45348358154297, -3.7156600952148438, 27.740676879882812, 18.886886596679688, -1.8948211669921875, -6.794647216796875, 16.107332229614258, -1.588287353515625, 19.63885498046875, -1.7715377807617188, 65.57069396972656, -8.428630828857422, 27.242576599121094, 6.713653564453125, 66.67692565917969, 50.11827087402344, 94.81887817382812, 5.422374725341797, 67.87518310546875, 18.160995483398438, 23.584976196289062, 51.37969970703125, 39.56972122192383, 31.119644165039062, 8.266481399536133, 44.11170959472656, 52.76005554199219, 30.25389862060547, 3.457050323486328, 34.90791320800781, 71.38328552246094, 12.681306838989258, 1.4534835815429688, 31.145862579345703, 37.818199157714844, 22.150489807128906, 17.813682556152344, 16.60205078125, 22.49596405029297, 25.147048950195312, -2.03118896484375, 28.488998413085938, 8.326202392578125, 8.441192626953125, 45.885337829589844, 23.751869201660156, 18.28168487548828, -6.629241943359375, 44.09382629394531, 30.221031188964844, 25.160104751586914, 15.771835327148438, 53.267059326171875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000192.npy"} +{"epoch": 0.28193832599118945, "step": 193, "batch_size": 64, "mean": 16.950923919677734, "std": 18.348241806030273, "min": -45.65321350097656, "p10": -1.4036169052124023, "median": 17.462520599365234, "p90": 39.14150085449219, "max": 59.43144226074219, "pos_frac": 0.875, "sample": [15.327682495117188, -1.4447498321533203, 19.0802001953125, 26.32299041748047, -45.65321350097656, 21.69405174255371, 36.463104248046875, 25.170536041259766, -32.21083068847656, 59.43144226074219, 36.14467239379883, -5.931684494018555, 20.110816955566406, 15.98958969116211, 35.777503967285156, 2.1540184020996094, 7.749029159545898, 20.71539306640625, 0.33016204833984375, 21.129486083984375, 1.6487293243408203, 10.10833740234375, 17.357364654541016, 16.563316345214844, 7.062726974487305, 21.17818832397461, 4.849742889404297, 16.419845581054688, 4.883140563964844, 12.575820922851562, -7.118595123291016, 57.46485900878906, 22.632568359375, 8.585289001464844, 27.280349731445312, -5.1087799072265625, 19.340232849121094, 2.5430526733398438, 3.4599552154541016, 25.23576545715332, 4.043418884277344, 26.18932342529297, 16.06975555419922, 18.208412170410156, 20.655609130859375, -13.624635696411133, 48.707305908203125, 45.624855041503906, 18.99997901916504, 14.919916152954102, -1.3076400756835938, 37.149391174316406, 17.567676544189453, 6.772880554199219, 34.75044250488281, 31.17071533203125, 41.571746826171875, 39.995262145996094, 10.206043243408203, 15.090827941894531, 44.433616638183594, 11.547950744628906, 28.856414794921875, 21.947731018066406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000193.npy"} +{"epoch": 0.2834067547723935, "step": 194, "batch_size": 64, "mean": 22.667644500732422, "std": 19.858245849609375, "min": -5.2749786376953125, "p10": 0.7322265625000002, "median": 19.48180103302002, "p90": 51.221525573730474, "max": 81.15718078613281, "pos_frac": 0.90625, "sample": [33.71698760986328, -3.3110885620117188, 14.032630920410156, 25.891510009765625, 5.4414825439453125, 52.027313232421875, -2.1826515197753906, 49.34135437011719, 6.7600555419921875, -1.4862442016601562, 13.06496810913086, 56.5831298828125, -5.2749786376953125, 24.091049194335938, 21.73389434814453, 6.963539123535156, 20.157249450683594, 6.46636962890625, 20.82583999633789, 35.50543212890625, 1.1125259399414062, 15.972806930541992, 4.181785583496094, -0.1754150390625, 42.020530700683594, 8.244071960449219, 3.061429977416992, 81.15718078613281, 33.090850830078125, 40.19343566894531, 62.912567138671875, 45.030853271484375, 44.69084548950195, 21.14678192138672, 9.641578674316406, 60.43396759033203, 23.975439071655273, 5.321466445922852, 28.933349609375, 3.6508750915527344, 54.8017578125, 6.866851806640625, 0.6400432586669922, 8.515602111816406, 29.364765167236328, 41.87590789794922, 8.59042739868164, 2.120697021484375, 18.806352615356445, 9.379440307617188, 31.154922485351562, 8.449859619140625, 15.99386215209961, 53.532501220703125, 0.9473209381103516, 46.65760040283203, 25.84619903564453, 43.55067443847656, 36.83403015136719, 11.964433670043945, 42.031883239746094, 14.548774719238281, 23.533653259277344, -0.223175048828125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000194.npy"} +{"epoch": 0.28487518355359764, "step": 195, "batch_size": 64, "mean": 22.482343673706055, "std": 18.670944213867188, "min": -13.693412780761719, "p10": 0.9351194381713877, "median": 21.258570671081543, "p90": 44.19702987670899, "max": 95.12062072753906, "pos_frac": 0.921875, "sample": [34.55511474609375, 20.365554809570312, 41.273773193359375, 35.67079162597656, 15.574165344238281, 47.51354217529297, 9.352485656738281, 29.511276245117188, 32.08270263671875, 0.15563392639160156, 0.5516891479492188, 14.884536743164062, 14.993110656738281, -12.385101318359375, 9.44024658203125, 23.789310455322266, 28.112422943115234, 23.559173583984375, -13.693412780761719, 95.12062072753906, 17.595279693603516, 21.94972801208496, 7.372936248779297, 4.092742919921875, 53.21023941040039, 21.079940795898438, 33.88702392578125, 44.82575988769531, 23.024127960205078, 52.78059387207031, 29.691261291503906, -2.485626220703125, 39.615936279296875, 29.887840270996094, 20.1328125, 30.883560180664062, 1.8297901153564453, 15.727363586425781, 24.300003051757812, 29.736099243164062, -7.471626281738281, 39.857177734375, 9.514423370361328, 13.660636901855469, 73.01622009277344, 43.13648223876953, 13.618160247802734, 15.16131591796875, 13.908233642578125, 25.301115036010742, 18.156890869140625, 14.918357849121094, 44.65155029296875, 26.13623046875, 5.128501892089844, -0.893646240234375, 22.228864669799805, 17.36089324951172, 25.6242733001709, 21.43720054626465, 2.436077117919922, 17.708709716796875, 28.586181640625, 6.122734069824219], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000195.npy"} +{"epoch": 0.28634361233480177, "step": 196, "batch_size": 64, "mean": 26.918298721313477, "std": 22.219341278076172, "min": -14.008438110351562, "p10": 3.423771667480469, "median": 21.631463050842285, "p90": 58.92893676757813, "max": 82.51739501953125, "pos_frac": 0.953125, "sample": [58.72135925292969, 63.74715805053711, 40.297760009765625, 44.051361083984375, 5.1635894775390625, 46.28678894042969, 46.47908020019531, 43.96343994140625, 50.36277770996094, 26.70856475830078, 8.324943542480469, 25.87050437927246, 36.29210662841797, 55.231834411621094, 19.922927856445312, 8.119718551635742, 10.463323593139648, 12.695236206054688, 4.456108093261719, 46.37708282470703, 73.79643249511719, 1.202688217163086, 12.531356811523438, 12.749465942382812, 15.147624969482422, 16.634201049804688, 33.73887634277344, 45.725486755371094, 15.460853576660156, 5.031742095947266, 59.01789855957031, 10.093429565429688, 72.24832153320312, 56.32044982910156, -6.853271484375, 23.305831909179688, 32.75664138793945, 19.137977600097656, 3.3603668212890625, 17.340599060058594, 20.60748863220215, 28.035160064697266, 82.51739501953125, 0.6563587188720703, 25.210487365722656, 10.94461441040039, 10.532997131347656, 17.095855712890625, 7.128292083740234, 3.57171630859375, -14.008438110351562, 15.673606872558594, 31.83111572265625, 19.883220672607422, 68.37739562988281, -4.817718505859375, 2.051727294921875, 22.655437469482422, 24.61890411376953, 41.44171142578125, 66.4920883178711, 37.47478485107422, 28.518829345703125, 3.995424270629883], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000196.npy"} +{"epoch": 0.2878120411160059, "step": 197, "batch_size": 64, "mean": 21.327590942382812, "std": 18.700523376464844, "min": -12.61541748046875, "p10": 1.5567043304443362, "median": 17.1305513381958, "p90": 45.08239860534669, "max": 72.75408172607422, "pos_frac": 0.921875, "sample": [-0.06728744506835938, 65.13185119628906, 32.67771911621094, 7.057502746582031, 33.68458557128906, -0.5529251098632812, 12.223106384277344, 5.32318115234375, 16.8144474029541, 64.00076293945312, 72.75408172607422, 34.277618408203125, 17.75033187866211, 17.837371826171875, 5.7279815673828125, 14.693340301513672, 6.172054290771484, 56.19330596923828, 33.53343200683594, 11.7686767578125, 31.577392578125, 37.06073760986328, 32.46651840209961, 9.47802734375, 14.80488395690918, 5.543205261230469, 3.7342376708984375, 24.169944763183594, 58.261192321777344, 37.3746337890625, 63.94685363769531, 13.292755126953125, 1.7228202819824219, 31.2642822265625, -1.501312255859375, 30.891525268554688, 7.8624420166015625, 5.9229278564453125, -2.2557830810546875, -12.61541748046875, 6.519983291625977, 24.64037322998047, 0.7426624298095703, 14.005203247070312, 14.221824645996094, 14.341079711914062, 40.00025939941406, 17.9898681640625, 24.318939208984375, 46.38277816772461, 1.4855117797851562, 33.53593826293945, 16.694488525390625, 7.789863586425781, 2.1962051391601562, 23.09026336669922, 17.4466552734375, 23.3041934967041, 42.048179626464844, 1.9155654907226562, 28.599609375, 23.394386291503906, 8.035400390625, 32.26344299316406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000197.npy"} +{"epoch": 0.28928046989721, "step": 198, "batch_size": 64, "mean": 23.0856990814209, "std": 23.145610809326172, "min": -13.51214599609375, "p10": 1.9882308959960944, "median": 17.17600154876709, "p90": 46.567557907104494, "max": 120.1640625, "pos_frac": 0.90625, "sample": [-3.1290969848632812, -2.6984100341796875, 18.918527603149414, 74.05032348632812, 17.98473358154297, 32.01477813720703, 44.282440185546875, 12.27499008178711, 36.05767822265625, 29.979001998901367, 45.66180419921875, 25.993560791015625, -13.51214599609375, 19.309005737304688, 11.268913269042969, 17.010478973388672, 40.49408721923828, 40.05970764160156, 12.812301635742188, 21.925352096557617, 5.986789703369141, 4.605201721191406, 10.722909927368164, 14.106460571289062, 4.91187858581543, 13.12451171875, 11.30172348022461, 26.725635528564453, 13.388067245483398, 46.95573806762695, 23.01214599609375, 16.73487091064453, 14.133743286132812, 1.7079925537109375, 37.58038330078125, 4.330310821533203, 2.642120361328125, 15.392311096191406, 8.640289306640625, 57.33587646484375, -1.0650634765625, 86.68513488769531, 27.32823944091797, 13.505706787109375, 5.764045715332031, 21.044769287109375, 56.893043518066406, 9.564924240112305, 3.8649978637695312, 17.494657516479492, -2.3496971130371094, 21.7896728515625, 44.77707290649414, 45.08735656738281, 17.341524124145508, 36.824405670166016, 21.850860595703125, 14.660537719726562, -4.448141098022461, 70.96908569335938, 8.887413024902344, 120.1640625, 8.751859664916992, 18.00124740600586], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000198.npy"} +{"epoch": 0.2907488986784141, "step": 199, "batch_size": 64, "mean": 22.919063568115234, "std": 22.386255264282227, "min": -17.710159301757812, "p10": -0.4883747100830065, "median": 19.708515167236328, "p90": 55.64830169677735, "max": 95.28842163085938, "pos_frac": 0.890625, "sample": [19.50598907470703, 34.80284118652344, -12.758697509765625, 4.929779052734375, 53.68806457519531, 35.97700500488281, -5.936767578125, 39.47932434082031, -17.710159301757812, 44.89824676513672, 29.012985229492188, 10.611053466796875, 10.11962890625, 25.19634246826172, 65.63390350341797, 43.74987030029297, 5.95135498046875, 26.438987731933594, 62.32258605957031, 13.770833969116211, 3.07574462890625, 32.1140251159668, 44.06341552734375, 45.417205810546875, -9.152957916259766, 4.57403564453125, 13.689102172851562, 17.100692749023438, 40.7296142578125, -2.976104736328125, 2.9779205322265625, 56.4884033203125, 67.44345092773438, 12.522178649902344, 23.19849395751953, 57.41785430908203, 2.4239730834960938, 20.676204681396484, 3.2419357299804688, 45.86967849731445, 3.6879043579101562, 19.911041259765625, 27.18816375732422, 12.6783447265625, 58.821311950683594, 12.992141723632812, 21.121410369873047, 6.209939956665039, 2.6471900939941406, -6.587486267089844, 21.81256866455078, 12.440401077270508, 9.897232055664062, 24.15546417236328, 46.69117736816406, 28.693389892578125, 4.6307830810546875, 18.426902770996094, -1.0395622253417969, 12.435111999511719, 39.180511474609375, 24.161895751953125, 95.28842163085938, 0.7977294921875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000199.npy"} +{"epoch": 0.2922173274596182, "step": 200, "batch_size": 64, "mean": 24.642288208007812, "std": 25.758798599243164, "min": -45.71478271484375, "p10": -0.7799459457397457, "median": 18.634178161621094, "p90": 57.59874420166017, "max": 89.00363159179688, "pos_frac": 0.875, "sample": [55.38599395751953, 36.74909210205078, 21.207584381103516, 66.4996337890625, 8.997982025146484, 19.998863220214844, 5.001655578613281, 19.191530227661133, 81.5499267578125, 2.3145408630371094, 66.8203353881836, 52.91969299316406, 17.472503662109375, -5.951164245605469, 6.335824966430664, 58.54706573486328, 38.28456115722656, 65.67942810058594, 7.259033203125, 46.861114501953125, 5.228006362915039, 18.076826095581055, 49.93682098388672, 50.0208740234375, 3.2378463745117188, 16.630765914916992, 23.112030029296875, 4.8200225830078125, 88.68522644042969, 6.687232971191406, 23.191043853759766, -1.0814590454101562, 10.777603149414062, 1.27642822265625, 17.602455139160156, 19.234697341918945, -45.71478271484375, 28.530899047851562, 37.409889221191406, 89.00363159179688, 4.563632965087891, 33.47871398925781, 14.81207275390625, 17.829254150390625, -0.9134407043457031, -10.368057250976562, 21.777692794799805, 7.697216033935547, 44.176300048828125, -0.4684581756591797, 3.402618408203125, 51.73701477050781, 12.028732299804688, 15.301963806152344, 10.387191772460938, 51.09501647949219, 39.14970016479492, 29.17249298095703, 5.398536682128906, 23.880409240722656, -4.934467315673828, 53.851600646972656, -5.506805419921875, 41.766212463378906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000200.npy"} +{"epoch": 0.2936857562408223, "step": 201, "batch_size": 64, "mean": 25.56842803955078, "std": 23.333568572998047, "min": -19.69439697265625, "p10": -0.8998674392700188, "median": 22.801776885986328, "p90": 55.750619506835946, "max": 98.30783081054688, "pos_frac": 0.875, "sample": [58.60807800292969, 15.773675918579102, 48.94829559326172, -19.69439697265625, 11.195098876953125, 30.143173217773438, 10.33213996887207, 6.158271789550781, 3.999542236328125, 11.271026611328125, -1.73876953125, 25.263412475585938, 7.9031982421875, 24.429367065429688, 56.781494140625, 37.87615203857422, 33.76947021484375, 19.9295711517334, 8.919692993164062, 49.69775390625, 73.30030822753906, 66.9996337890625, 14.330192565917969, 0.6930694580078125, 24.185211181640625, 58.464134216308594, -10.439746856689453, 30.129257202148438, 28.908790588378906, 10.621011734008789, 36.58024978637695, 27.980331420898438, 40.7281494140625, 47.0592041015625, -9.68218994140625, -1.2422981262207031, 43.975677490234375, 9.285324096679688, 13.738044738769531, 83.78559875488281, 32.9034423828125, 9.985427856445312, 15.036911010742188, 17.78729248046875, 5.065614700317383, 41.00286865234375, 6.209175109863281, 15.234870910644531, 53.345245361328125, 19.87757110595703, 27.31500244140625, -4.7718048095703125, 18.864200592041016, 41.64557647705078, 0.3811187744140625, 42.49267578125, 98.30783081054688, 40.68260955810547, -0.10086250305175781, -3.053070068359375, 45.18634033203125, 23.46300506591797, 22.140548706054688, 38.41155242919922], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000201.npy"} +{"epoch": 0.29515418502202645, "step": 202, "batch_size": 64, "mean": 18.85692596435547, "std": 21.07270622253418, "min": -26.96721649169922, "p10": -3.544053268432616, "median": 16.259931564331055, "p90": 48.39544296264649, "max": 74.73672485351562, "pos_frac": 0.84375, "sample": [64.39994812011719, 7.04185676574707, 19.912689208984375, 42.52711486816406, 0.8142814636230469, 19.11175537109375, 39.10649871826172, 3.909463882446289, 35.32541275024414, 22.531347274780273, 7.765678405761719, -15.140884399414062, 20.41143035888672, 46.075416564941406, 26.71605682373047, 22.326465606689453, -22.90404510498047, 3.806060791015625, 19.17898941040039, 49.389739990234375, 15.557777404785156, 49.68357849121094, -5.508525848388672, 28.82086181640625, 5.720558166503906, 6.312522888183594, 74.73672485351562, 3.8804378509521484, 13.139829635620117, 51.38105010986328, 14.404251098632812, 35.900020599365234, 44.897483825683594, 31.407379150390625, 0.1091461181640625, 1.9028587341308594, 20.712112426757812, 27.314971923828125, 41.28877258300781, 51.0054931640625, 37.45759201049805, 0.5976028442382812, 0.6893138885498047, 42.561805725097656, 8.901405334472656, 44.39517593383789, 26.16988182067871, -6.989967346191406, 7.074909210205078, 9.2225341796875, 11.535274505615234, 6.850551605224609, -26.96721649169922, -1.7305450439453125, -0.1087188720703125, 16.962085723876953, 14.727251052856445, -4.0332183837890625, -2.402667999267578, 27.370031356811523, 53.56047821044922, -6.071514129638672, 21.30487060546875, 0.7937202453613281], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000202.npy"} +{"epoch": 0.2966226138032305, "step": 203, "batch_size": 64, "mean": 21.060503005981445, "std": 18.404932022094727, "min": -8.97296142578125, "p10": -2.929747390747069, "median": 16.981712341308594, "p90": 46.352630615234375, "max": 86.2940673828125, "pos_frac": 0.875, "sample": [-1.3587074279785156, 41.610904693603516, 49.390193939208984, 16.637718200683594, -3.6030502319335938, 45.66241455078125, 13.062458038330078, 12.062713623046875, 8.995744705200195, 33.6761474609375, 11.706245422363281, 42.66712951660156, 17.16368865966797, 16.79973602294922, -7.914882659912109, 28.888267517089844, 14.231216430664062, 6.206260681152344, 37.7144775390625, 13.173370361328125, 54.40605926513672, 54.595062255859375, 6.657909393310547, 11.564319610595703, 27.10613250732422, 17.54402732849121, 40.520606994628906, 10.6190185546875, -3.857818603515625, 14.278404235839844, -5.77435302734375, 26.44933319091797, 14.088104248046875, 46.71331024169922, 18.085006713867188, 30.695491790771484, 40.23229217529297, 22.893878936767578, 6.318634033203125, 4.6547698974609375, 13.392509460449219, 8.572364807128906, 19.200016021728516, 50.295013427734375, 3.921630859375, -3.6721649169921875, -4.344779968261719, 13.378059387207031, 1.0346660614013672, 9.375129699707031, 42.244041442871094, 19.913166046142578, 29.725513458251953, 18.037378311157227, 33.68553161621094, -8.97296142578125, 28.553619384765625, 46.6484375, 27.432037353515625, 86.2940673828125, 13.238550186157227, 30.337921142578125, 21.105573654174805, 13.914710998535156], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000203.npy"} +{"epoch": 0.29809104258443464, "step": 204, "batch_size": 64, "mean": 21.9324893951416, "std": 20.08610725402832, "min": -12.054153442382812, "p10": -0.22125968933105436, "median": 18.981603622436523, "p90": 47.944997787475586, "max": 81.93719482421875, "pos_frac": 0.890625, "sample": [19.098953247070312, 29.103748321533203, 5.135318756103516, 17.788896560668945, 36.587646484375, 18.533720016479492, 41.81060791015625, 1.15106201171875, 22.58075714111328, 13.76539421081543, -0.7730140686035156, 20.230222702026367, 4.488685607910156, 26.13836097717285, 24.32628631591797, 25.848865509033203, 23.458160400390625, 28.871482849121094, 16.329620361328125, 16.60837745666504, 61.77644348144531, 9.022552490234375, 1.7927093505859375, -3.8281116485595703, 18.08026123046875, 0.09885978698730469, 19.812942504882812, 49.039058685302734, 35.52814865112305, 7.3801727294921875, 78.15676879882812, 16.42369842529297, -3.9476776123046875, -0.35845375061035156, 8.538955688476562, 47.521183013916016, 9.278945922851562, 1.1389694213867188, 70.11549377441406, -12.054153442382812, 41.1241455078125, 48.12663269042969, 11.629188537597656, 24.761287689208984, 7.55450439453125, 20.20008659362793, 14.981033325195312, 24.597747802734375, 69.9738998413086, 12.687355041503906, 14.522773742675781, 34.00872039794922, -4.9302978515625, 23.010414123535156, 81.93719482421875, 35.60874938964844, 17.040712356567383, 19.3355770111084, 18.864253997802734, 20.219390869140625, -2.410400390625, 31.297212600708008, 30.81406021118164, 4.125152587890625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000204.npy"} +{"epoch": 0.29955947136563876, "step": 205, "batch_size": 64, "mean": 23.783849716186523, "std": 23.25178337097168, "min": -24.68829345703125, "p10": -2.732733154296875, "median": 19.641587257385254, "p90": 55.9523536682129, "max": 80.36676788330078, "pos_frac": 0.84375, "sample": [0.17007827758789062, 17.924057006835938, 10.679450988769531, 56.855308532714844, 3.091686248779297, 8.89033317565918, -2.0643138885498047, 11.298980712890625, 18.75536346435547, 29.1939697265625, 29.828262329101562, -18.0081787109375, -7.0324554443359375, 30.446306228637695, -2.7972412109375, 35.11137390136719, -2.287534713745117, 79.12289428710938, 52.70109558105469, 19.56545639038086, 38.06584167480469, 18.56842041015625, 1.41082763671875, 39.35188293457031, 39.873291015625, 26.22454833984375, 47.23399353027344, 13.921489715576172, 12.123878479003906, 38.886009216308594, -4.474494934082031, 80.36676788330078, 18.777976989746094, 7.426055908203125, 6.97564697265625, 22.295631408691406, 53.845458984375, 38.21461486816406, 19.21619415283203, 20.994415283203125, -24.68829345703125, 45.15989685058594, 65.61963653564453, 26.768592834472656, 12.581138610839844, 6.878353118896484, 17.561508178710938, 30.327110290527344, 59.69230651855469, 11.86297607421875, 27.09368896484375, 65.85368347167969, 46.023441314697266, 19.71771812438965, 33.95892333984375, 43.0970458984375, 36.14076232910156, 0.772064208984375, -2.58221435546875, -10.904182434082031, 7.959266662597656, 66.44898986816406, -8.152633666992188, 34.23320770263672], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000205.npy"} +{"epoch": 0.3010279001468429, "step": 206, "batch_size": 64, "mean": 27.303932189941406, "std": 21.4771785736084, "min": -9.468379974365234, "p10": -1.281593322753906, "median": 24.931851387023926, "p90": 57.68658752441407, "max": 98.02444458007812, "pos_frac": 0.875, "sample": [22.318931579589844, 38.427734375, 10.591621398925781, 21.94528579711914, 98.02444458007812, 54.695960998535156, 33.69879913330078, 46.901573181152344, 37.48332214355469, 37.45599365234375, 24.217559814453125, 8.128982543945312, 25.646142959594727, 10.994354248046875, -3.04876708984375, 19.858638763427734, 31.70287322998047, 9.524978637695312, 18.025508880615234, -7.31434440612793, 36.89130401611328, 7.293113708496094, 43.63853454589844, 49.268218994140625, 40.73927307128906, 31.46820068359375, 22.088165283203125, 58.968284606933594, 11.9071044921875, -5.0654449462890625, 50.115203857421875, 60.39935302734375, 34.68971252441406, 22.350357055664062, 11.054147720336914, 48.704925537109375, 21.620285034179688, -9.468379974365234, 61.65470886230469, 15.725051879882812, -1.0990524291992188, 28.924697875976562, 14.69204330444336, 27.341354370117188, 21.671031951904297, -4.526069641113281, 20.633785247802734, 30.75171661376953, 39.09967803955078, 20.41433334350586, -3.5865306854248047, -1.3598251342773438, 30.545127868652344, 8.023048400878906, 64.27775573730469, 35.58952331542969, 8.989103317260742, 37.320316314697266, 38.260047912597656, 62.94358825683594, 7.7255096435546875, 1.7290096282958984, 69.42153930664062, 36.34817123413086], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000206.npy"} +{"epoch": 0.302496328928047, "step": 207, "batch_size": 64, "mean": 28.46685028076172, "std": 25.99673843383789, "min": -12.064399719238281, "p10": 3.2557680130004885, "median": 21.327754974365234, "p90": 67.30462341308596, "max": 97.75680541992188, "pos_frac": 0.9375, "sample": [3.4512901306152344, 3.6675491333007812, 16.610939025878906, 10.251483917236328, 40.621673583984375, 3.348196029663086, 69.87466430664062, 16.185810089111328, 3.216156005859375, 1.7978134155273438, 41.065948486328125, 1.8373489379882812, 53.19285583496094, -1.0713348388671875, 17.174850463867188, 8.195699691772461, 32.76698303222656, 43.22320556640625, 3.429555892944336, 6.880697250366211, 16.431808471679688, 52.483360290527344, 9.465164184570312, 43.84653854370117, 15.738258361816406, 18.1728515625, 42.2127571105957, 23.46033477783203, -2.5497817993164062, 8.975486755371094, 61.307861328125, 42.896575927734375, 7.5936431884765625, 5.269371032714844, 27.147632598876953, 58.749603271484375, 16.476043701171875, 35.904083251953125, 7.606906890869141, 26.088394165039062, 53.897789001464844, 45.62535095214844, 7.8017730712890625, 53.73493576049805, 26.498863220214844, 26.74042510986328, 15.116676330566406, 71.39747619628906, 95.02377319335938, 43.505767822265625, 18.58758544921875, 93.55014038085938, -6.475175857543945, 30.020429611206055, 97.75680541992188, 24.552200317382812, 6.379911422729492, 29.306602478027344, 4.4890289306640625, 78.29476928710938, -12.064399719238281, 32.65777587890625, 73.28643798828125, 19.195175170898438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000207.npy"} +{"epoch": 0.3039647577092511, "step": 208, "batch_size": 64, "mean": 23.237592697143555, "std": 20.613441467285156, "min": -7.67413330078125, "p10": 0.8943748474121103, "median": 18.497854232788086, "p90": 54.434728240966805, "max": 98.78385925292969, "pos_frac": 0.921875, "sample": [18.065196990966797, 19.7159423828125, -2.3447723388671875, 21.87602996826172, 8.821979522705078, 12.097576141357422, 38.81108093261719, 8.696023941040039, 35.87684631347656, 39.126712799072266, 47.99161911010742, 18.930511474609375, 14.606212615966797, 38.03996276855469, 55.182533264160156, 5.950935363769531, 98.78385925292969, 11.518486022949219, 60.10539245605469, 57.44891357421875, 7.863311767578125, 33.506996154785156, 39.640228271484375, 25.303077697753906, 27.609268188476562, 10.157981872558594, 14.123794555664062, -6.1474609375, -7.67413330078125, 45.451141357421875, -0.8304786682128906, 24.610715866088867, 6.521564483642578, 16.98505401611328, 22.003173828125, 35.811920166015625, 68.53941345214844, 7.576831817626953, 12.174064636230469, 33.2789306640625, 21.85363006591797, 8.468055725097656, 8.527572631835938, 35.07832336425781, 15.246231079101562, 52.689849853515625, 21.609434127807617, 0.478302001953125, 3.5599822998046875, 24.99560546875, -0.3373565673828125, 14.044082641601562, 55.682518005371094, 19.935836791992188, 17.68206024169922, 0.5265960693359375, 4.917163848876953, 60.230628967285156, 37.7587890625, 8.9154052734375, 33.731056213378906, 4.31364631652832, 9.739601135253906, 1.7525253295898438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000208.npy"} +{"epoch": 0.3054331864904552, "step": 209, "batch_size": 64, "mean": 23.724063873291016, "std": 22.29418182373047, "min": -19.534828186035156, "p10": -4.599222564697265, "median": 19.157148361206055, "p90": 53.52809524536134, "max": 80.18209838867188, "pos_frac": 0.859375, "sample": [57.159637451171875, 26.938411712646484, 40.286617279052734, 2.3531417846679688, 46.818023681640625, 66.6576919555664, 66.79277038574219, 6.067966461181641, 18.85785675048828, 14.274284362792969, 7.189678192138672, 11.145633697509766, 42.25061798095703, 2.9891128540039062, 49.469871520996094, 36.838172912597656, 60.063758850097656, -9.038398742675781, 14.916557312011719, 48.20437240600586, 44.93213653564453, 14.37335205078125, 6.594417572021484, -12.60882568359375, 38.058433532714844, -3.8137893676757812, 2.512788772583008, 22.02625274658203, 7.334716796875, 48.99870681762695, 15.503204345703125, 29.286636352539062, -12.438636779785156, 29.695674896240234, 18.47458267211914, 19.597564697265625, -0.32404136657714844, 19.13845443725586, 80.18209838867188, 43.87178039550781, 7.125251770019531, 55.267333984375, 4.927215576171875, -7.3408355712890625, -4.9358367919921875, 6.416160583496094, 36.83319091796875, 23.56314468383789, 18.258102416992188, 43.592689514160156, 32.317718505859375, 21.781801223754883, 37.91387939453125, -19.534828186035156, 32.128326416015625, 13.790191650390625, 18.267581939697266, 13.707794189453125, 19.17584228515625, -5.3484344482421875, 48.255950927734375, 65.08277130126953, 21.185028076171875, 14.278755187988281], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000209.npy"} +{"epoch": 0.3069016152716593, "step": 210, "batch_size": 64, "mean": 22.228591918945312, "std": 19.132234573364258, "min": -10.617630004882812, "p10": 2.1065004348754885, "median": 18.057398796081543, "p90": 43.76594123840332, "max": 74.00839233398438, "pos_frac": 0.953125, "sample": [4.820289611816406, 7.968149185180664, 30.751220703125, 35.312686920166016, 19.381027221679688, 38.74726486206055, 43.867584228515625, -1.5572052001953125, 12.676773071289062, -6.505546569824219, 43.52877426147461, 1.102752685546875, 3.659832000732422, -10.617630004882812, 66.77224731445312, 13.093399047851562, 6.8753509521484375, 14.904953002929688, 7.210334777832031, 17.325111389160156, 39.32856750488281, 40.025840759277344, 22.62070655822754, 40.28019714355469, 4.298789978027344, 1.7796134948730469, 2.2794857025146484, 25.5257568359375, 34.20147705078125, 51.69027328491211, 18.78968620300293, 27.451385498046875, 40.62158966064453, 40.99481201171875, 9.563133239746094, 2.0323638916015625, 9.86776351928711, 15.891799926757812, 13.250356674194336, 19.072734832763672, 25.1839542388916, 36.39427185058594, 31.238174438476562, 8.256969451904297, 4.5868682861328125, 3.851806640625, 41.506500244140625, 16.470993041992188, 33.91964340209961, 15.370742797851562, 66.81834411621094, 74.00839233398438, 32.133644104003906, 27.108671188354492, 22.275611877441406, 8.204421997070312, 28.95836639404297, 6.443836212158203, 4.34004020690918, 64.62344360351562, 2.2909698486328125, 1.0330276489257812, 9.787628173828125, 48.93989562988281], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000210.npy"} +{"epoch": 0.30837004405286345, "step": 211, "batch_size": 64, "mean": 22.822021484375, "std": 21.04338264465332, "min": -16.61715316772461, "p10": -4.273029327392574, "median": 19.454721450805664, "p90": 51.762976074218756, "max": 73.98892211914062, "pos_frac": 0.890625, "sample": [17.28190040588379, 11.604082107543945, 57.046630859375, 35.440284729003906, 25.66695785522461, 11.733932495117188, 24.71820831298828, 32.816978454589844, 73.98892211914062, 15.47576904296875, 32.57135772705078, -14.773408889770508, 5.146472930908203, 11.764724731445312, 27.067916870117188, 52.56324768066406, 38.36468505859375, 12.891815185546875, 5.2969207763671875, 37.98844909667969, 12.047866821289062, 2.6530914306640625, 18.283981323242188, 26.518556594848633, 11.091522216796875, 0.34659576416015625, 6.844596862792969, 59.931182861328125, 16.18527603149414, 58.642547607421875, 71.76571655273438, 10.819046020507812, 13.528812408447266, 21.70386505126953, 36.13032531738281, 18.97442626953125, 6.0611572265625, 46.70914077758789, 27.820838928222656, 25.447479248046875, 47.29210662841797, 30.237533569335938, -11.123844146728516, 2.9927024841308594, 8.453523635864258, 34.82849884033203, 3.7731761932373047, -16.61715316772461, -10.457893371582031, 66.868896484375, -8.452438354492188, 27.71280860900879, 31.771278381347656, 34.17957305908203, 43.377525329589844, 49.89567565917969, 17.608787536621094, 35.25508117675781, -12.402870178222656, 33.405113220214844, 19.935016632080078, 16.634841918945312, 15.532546997070312, -6.25286865234375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000211.npy"} +{"epoch": 0.30983847283406757, "step": 212, "batch_size": 64, "mean": 28.72591781616211, "std": 25.697410583496094, "min": -9.192626953125, "p10": 0.007090568542482134, "median": 23.664461135864258, "p90": 61.627748107910165, "max": 103.32574462890625, "pos_frac": 0.890625, "sample": [7.720603942871094, -2.803741455078125, 91.60421752929688, 28.564559936523438, 7.797340393066406, 45.963592529296875, 10.032058715820312, 18.952682495117188, 12.678606033325195, 12.341398239135742, 47.07969665527344, 11.148521423339844, 8.222785949707031, -9.192626953125, -3.0339622497558594, 19.499923706054688, 43.775360107421875, 72.10491180419922, 38.36531066894531, 37.94976806640625, 15.33551025390625, -8.233406066894531, 23.55819320678711, 55.472930908203125, 19.423179626464844, 29.58100128173828, 64.13411712646484, 6.500213623046875, 56.48175048828125, -3.1865787506103516, 11.784126281738281, 16.226303100585938, 103.32574462890625, 23.912002563476562, 5.834270477294922, 7.226715087890625, 31.181564331054688, 21.70366668701172, 38.79594421386719, 99.27674865722656, 47.128265380859375, 28.25457000732422, 27.570650100708008, 23.770729064941406, 15.629119873046875, 22.616729736328125, -0.6869964599609375, 1.626626968383789, 62.41999053955078, 7.503574371337891, 24.532665252685547, 55.20976257324219, 51.086029052734375, 27.513565063476562, 68.3786849975586, -1.3311805725097656, 34.35552978515625, 12.910587310791016, 59.77918243408203, 50.46044158935547, 2.6080703735351562, 44.067832946777344, 4.22216796875, 51.72705078125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000212.npy"} +{"epoch": 0.31130690161527164, "step": 213, "batch_size": 64, "mean": 29.10615348815918, "std": 26.227977752685547, "min": -27.288284301757812, "p10": 2.064271545410157, "median": 25.437053680419922, "p90": 69.36406173706055, "max": 102.19317626953125, "pos_frac": 0.9375, "sample": [88.38034057617188, 7.12652587890625, 9.7425537109375, 2.827394485473633, 12.055206298828125, 2.91796875, 7.624574661254883, 45.24403762817383, -27.288284301757812, -3.882537841796875, 1.4646625518798828, 38.3516845703125, 35.019203186035156, -7.084037780761719, 27.461692810058594, 27.351699829101562, 1.7372188568115234, 24.848358154296875, 59.752708435058594, 46.70036315917969, 84.92529296875, 39.51116943359375, 21.5814266204834, 78.6174545288086, 20.117374420166016, 55.98652648925781, 66.36228942871094, 4.166353225708008, 9.929618835449219, 4.563020706176758, 29.507980346679688, 16.983917236328125, 26.02574920654297, 31.076433181762695, 28.889694213867188, 36.12648010253906, 20.345657348632812, 45.62828063964844, 70.6505355834961, 41.650840759277344, 12.763343811035156, 6.9800567626953125, 102.19317626953125, 6.89349365234375, 10.117935180664062, 34.20819854736328, 55.10930633544922, 74.80581665039062, 23.859619140625, 37.506805419921875, 35.08381652832031, 80.54850769042969, 48.525054931640625, 8.681072235107422, 0.3339080810546875, -0.23764991760253906, 15.519218444824219, 37.69529724121094, 10.297321319580078, 4.245208740234375, 19.142047882080078, 20.192672729492188, 47.11870574951172, 38.21340560913086], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000213.npy"} +{"epoch": 0.31277533039647576, "step": 214, "batch_size": 64, "mean": 23.016742706298828, "std": 24.110767364501953, "min": -30.781166076660156, "p10": 1.0703193664550792, "median": 15.336590766906738, "p90": 58.44501876831055, "max": 88.16409301757812, "pos_frac": 0.90625, "sample": [12.370285034179688, 60.33439636230469, 2.423259735107422, 7.337879180908203, 6.963617324829102, 2.627685546875, 6.352222442626953, -30.781166076660156, 57.476234436035156, 11.648576736450195, 51.83263397216797, 36.40279006958008, 23.09241485595703, -0.6521453857421875, 28.153823852539062, 14.093189239501953, 7.36505126953125, 2.3441123962402344, 6.177177429199219, 30.585670471191406, 0.6538848876953125, 12.168067932128906, 25.60663604736328, 16.325439453125, 56.61018371582031, 23.105382919311523, 25.228782653808594, 4.460386276245117, 6.341938018798828, 2.0419998168945312, 20.663333892822266, 19.382354736328125, 28.204994201660156, 68.58673095703125, 10.592315673828125, 88.16409301757812, 58.65129852294922, 25.66961669921875, -0.076324462890625, 57.87611389160156, 21.47802734375, 23.62071990966797, 57.96369934082031, 73.54167175292969, -15.701131820678711, 61.424224853515625, 9.23353385925293, 19.54711151123047, 55.22734069824219, 24.72219467163086, 10.55389404296875, 14.562347412109375, 5.872688293457031, 7.742588043212891, 14.405105590820312, 36.637664794921875, 13.370159149169922, 55.05153274536133, 16.1108341217041, 8.587936401367188, 6.794147491455078, 74.34907531738281, -5.525337219238281, -2.903423309326172], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000214.npy"} +{"epoch": 0.3142437591776799, "step": 215, "batch_size": 64, "mean": 24.892501831054688, "std": 23.99720573425293, "min": -20.050613403320312, "p10": 2.07691879272461, "median": 18.657175064086914, "p90": 60.667986297607435, "max": 96.96463012695312, "pos_frac": 0.90625, "sample": [19.330684661865234, 63.46508026123047, 36.71075439453125, 9.799690246582031, -0.36049652099609375, 4.618255615234375, -3.6843338012695312, 43.423667907714844, 31.292068481445312, 17.983665466308594, 19.809368133544922, 10.893369674682617, 7.961282730102539, 17.775407791137695, 79.54534912109375, 35.153221130371094, 29.70209503173828, 50.63105773925781, 92.05873107910156, 31.436166763305664, 2.6168670654296875, 31.198394775390625, 20.02745819091797, 3.6434326171875, 13.42730712890625, 14.035301208496094, -0.2033977508544922, 15.263191223144531, 57.6002311706543, 11.02923583984375, 4.889247894287109, 17.02964210510254, 13.14498519897461, 17.359880447387695, 61.98273849487305, 27.022790908813477, 1.8455123901367188, 53.33233642578125, 42.726470947265625, 22.926223754882812, 67.0540542602539, 25.437400817871094, 26.795459747314453, 17.528213500976562, 66.73684692382812, 15.611114501953125, 35.389427185058594, 5.185829162597656, 12.765087127685547, 50.02577209472656, -20.050613403320312, 34.33409881591797, 96.96463012695312, 7.425647735595703, 22.8717041015625, -13.054656982421875, 42.7899169921875, 6.066192626953125, 4.733577728271484, 24.959867477416992, 27.937063217163086, 4.363811492919922, -9.055063247680664, 11.861770629882812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000215.npy"} +{"epoch": 0.315712187958884, "step": 216, "batch_size": 64, "mean": 26.958616256713867, "std": 26.480247497558594, "min": -17.63250732421875, "p10": -0.8274059295654292, "median": 23.573158264160156, "p90": 67.80697326660157, "max": 91.19178771972656, "pos_frac": 0.875, "sample": [35.76905822753906, 33.68310546875, -12.441654205322266, 32.21778106689453, 31.8660888671875, 16.148340225219727, -0.9903068542480469, 24.74536895751953, 49.34698486328125, 1.7139816284179688, 91.19178771972656, 3.5245513916015625, 32.903934478759766, -6.4858245849609375, 10.42233657836914, 10.422409057617188, 1.993825912475586, 45.4633903503418, 5.148731231689453, -9.50836181640625, 6.162773132324219, 73.5987548828125, 6.9838104248046875, 38.83204650878906, 61.45287322998047, 22.712982177734375, 5.0842132568359375, 46.646907806396484, 57.23625564575195, 28.36760139465332, 30.096649169921875, 31.8194580078125, 24.433334350585938, 3.723329544067383, 69.14627075195312, 30.046844482421875, -17.63250732421875, 79.35774230957031, 5.479240417480469, 15.950386047363281, 3.249876022338867, 36.143096923828125, 13.185955047607422, -0.44730377197265625, 21.289901733398438, 28.8001708984375, 59.28229522705078, 79.54502868652344, 11.703033447265625, 1.449066162109375, 70.58522033691406, 16.027992248535156, 3.2765655517578125, -1.0146598815917969, 11.528009414672852, 55.87567138671875, 62.079505920410156, 40.20903015136719, 64.68194580078125, 78.29891967773438, 15.992813110351562, 0.6772098541259766, -3.5839061737060547, 39.881553649902344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000216.npy"} +{"epoch": 0.31718061674008813, "step": 217, "batch_size": 64, "mean": 27.929889678955078, "std": 21.522830963134766, "min": -17.742835998535156, "p10": 3.8765695571899417, "median": 27.046737670898438, "p90": 52.28611679077149, "max": 85.60844421386719, "pos_frac": 0.921875, "sample": [20.89208984375, 29.249290466308594, 29.294723510742188, 3.812681198120117, 7.85546875, 12.588241577148438, 22.343229293823242, 43.48637390136719, 28.4556884765625, 13.176116943359375, -0.5269012451171875, 77.51924133300781, 47.089927673339844, 39.576385498046875, -10.445777893066406, 15.933971405029297, 19.778736114501953, -17.742835998535156, 33.21563720703125, 83.07131958007812, -2.217254638671875, 18.082612991333008, 20.37531280517578, 27.846954345703125, 52.33399200439453, 32.896942138671875, 28.722579956054688, 4.025642395019531, 7.1475067138671875, 33.67303466796875, 85.60844421386719, 41.126792907714844, 7.432275772094727, 16.561077117919922, 48.24644470214844, 25.811038970947266, 26.24652099609375, 52.489051818847656, 17.474559783935547, 51.49058532714844, 44.3406982421875, 2.0224075317382812, 56.57969665527344, 42.14141845703125, 22.585041046142578, 52.174407958984375, 14.599136352539062, 35.98456573486328, 36.149070739746094, 32.498382568359375, 30.240951538085938, 35.929954528808594, 81.65815734863281, 24.199533462524414, 42.982669830322266, 8.452293395996094, 10.190689086914062, 9.678947448730469, 11.306541442871094, 10.403060913085938, 18.086715698242188, 36.06449890136719, 36.62120056152344, -1.3749198913574219], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000217.npy"} +{"epoch": 0.3186490455212922, "step": 218, "batch_size": 64, "mean": 24.937423706054688, "std": 24.085628509521484, "min": -10.532302856445312, "p10": 0.6269290924072269, "median": 19.347110748291016, "p90": 54.90351028442384, "max": 114.82046508789062, "pos_frac": 0.90625, "sample": [24.881786346435547, 12.814323425292969, 17.51386260986328, 19.630489349365234, 41.15424346923828, 21.14805793762207, 77.85220336914062, 2.7710494995117188, 8.155097961425781, 19.099990844726562, 52.32189178466797, -3.8540267944335938, 12.088459014892578, 47.81861114501953, 71.12263488769531, 12.52484130859375, 25.923553466796875, 9.631534576416016, 5.364097595214844, 29.640159606933594, 9.812789916992188, 19.59423065185547, 9.313312530517578, 41.339691162109375, 7.299995422363281, -8.39739990234375, 50.600074768066406, 45.72340393066406, 33.379249572753906, 39.896446228027344, 14.244218826293945, 9.21575927734375, 23.631744384765625, 51.39904022216797, 39.54937744140625, 43.2432861328125, 6.157098770141602, 0.4721488952636719, 29.160306930541992, 3.180675506591797, 19.077335357666016, -10.532302856445312, 56.009918212890625, 8.938884735107422, -1.6817092895507812, -6.0142059326171875, 114.82046508789062, 51.915740966796875, 43.00019836425781, 13.814079284667969, 3.6641387939453125, 58.6177978515625, 60.65142822265625, 1.4807682037353516, 10.488201141357422, 34.20447540283203, 66.95352935791016, 28.42537498474121, 6.48307991027832, -9.701667785644531, 16.071929931640625, 0.9880828857421875, 31.788238525390625, 20.113014221191406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000218.npy"} +{"epoch": 0.3201174743024963, "step": 219, "batch_size": 64, "mean": 22.00741958618164, "std": 19.767765045166016, "min": -14.735198974609375, "p10": 1.2967185974121098, "median": 17.557645797729492, "p90": 57.9971923828125, "max": 69.06327819824219, "pos_frac": 0.9375, "sample": [69.06327819824219, 26.962175369262695, 11.994518280029297, 62.32878875732422, 8.515775680541992, 57.53736877441406, -14.735198974609375, 3.365720748901367, 1.131591796875, 35.571075439453125, 11.797792434692383, 16.88451385498047, 29.077835083007812, 68.71771240234375, 62.549537658691406, 14.639543533325195, 0.13486480712890625, 58.19425964355469, 48.011962890625, 26.508193969726562, 16.81304168701172, 7.05224609375, -13.217737197875977, 17.626827239990234, 29.64806365966797, 6.692008972167969, 25.929054260253906, 8.220829010009766, 59.63377380371094, 23.643531799316406, 13.540882110595703, 62.886260986328125, 17.9219970703125, 7.653547286987305, 18.087615966796875, 37.68799591064453, 31.78461456298828, 1.6820144653320312, 24.848976135253906, 9.841583251953125, 33.97310256958008, 15.305976867675781, 30.42066192626953, 6.568115234375, 11.68310546875, 17.48846435546875, 11.410369873046875, 9.651079177856445, 2.4006423950195312, 39.803009033203125, 17.950042724609375, 29.0167236328125, 10.459953308105469, -3.5826759338378906, -3.7217254638671875, 41.880401611328125, 20.727493286132812, 28.536598205566406, 15.833698272705078, 0.441986083984375, 8.305633544921875, 25.172584533691406, 23.554166793823242, 8.966920852661133], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000219.npy"} +{"epoch": 0.32158590308370044, "step": 220, "batch_size": 64, "mean": 20.889972686767578, "std": 17.149866104125977, "min": -4.3613433837890625, "p10": -0.41230125427246034, "median": 17.606765747070312, "p90": 47.81003112792969, "max": 59.024261474609375, "pos_frac": 0.890625, "sample": [55.46270751953125, 6.541515350341797, 16.49730110168457, 2.1822128295898438, 26.402650833129883, 47.652366638183594, 59.024261474609375, -0.8940353393554688, 28.82701873779297, 28.482521057128906, 21.94068145751953, 11.300010681152344, 45.641387939453125, 10.941566467285156, -0.6484489440917969, 13.304168701171875, 58.58219909667969, 27.20220375061035, 33.837242126464844, 36.5300407409668, 43.852874755859375, 2.8043441772460938, 13.140266418457031, 10.432754516601562, -4.3613433837890625, 47.877601623535156, 11.997079849243164, 16.858535766601562, 52.0391845703125, 20.803112030029297, 28.385391235351562, 18.354995727539062, 32.805233001708984, 10.589698791503906, 31.623794555664062, 28.79942512512207, 19.789993286132812, 54.43290710449219, 7.1971435546875, 13.02728271484375, 1.2664775848388672, 3.7135696411132812, 0.13871002197265625, 14.801437377929688, 3.7013187408447266, -1.8406829833984375, 36.958526611328125, 12.590774536132812, 1.5791969299316406, 14.869743347167969, 8.616508483886719, 21.191497802734375, 51.45935821533203, -2.181917190551758, 20.314151763916016, 36.4516716003418, 20.773120880126953, 27.09947967529297, 21.766326904296875, 12.116424560546875, 15.070323944091797, 29.311386108398438, -0.878143310546875, -1.190948486328125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000220.npy"} +{"epoch": 0.32305433186490456, "step": 221, "batch_size": 64, "mean": 32.81925582885742, "std": 22.986146926879883, "min": -12.770103454589844, "p10": 5.0504592895507825, "median": 29.558914184570312, "p90": 64.3977119445801, "max": 87.86102294921875, "pos_frac": 0.953125, "sample": [25.850967407226562, 16.09912109375, 29.398147583007812, 8.696676254272461, 30.889493942260742, 87.86102294921875, 24.9345703125, 84.06724548339844, 42.63178253173828, 59.83679962158203, 66.00720977783203, 0.9115409851074219, 68.92564392089844, 13.6240234375, 27.313753128051758, 53.84666442871094, 8.179229736328125, 58.037689208984375, 18.642065048217773, 47.88336944580078, 53.05713653564453, 16.126907348632812, 37.291954040527344, 71.41612243652344, 34.61804962158203, 54.967018127441406, 52.98655700683594, -8.215850830078125, 17.940261840820312, 11.159614562988281, 29.719680786132812, 28.05913543701172, 4.664058685302734, 28.59119415283203, 26.816360473632812, 30.905624389648438, 13.869760513305664, -12.770103454589844, 28.733169555664062, 55.62297058105469, 5.947601318359375, 65.48956298828125, 41.95037078857422, 33.11865234375, 35.250099182128906, 29.96288299560547, 14.806365966796875, 77.92547607421875, 28.0970458984375, 33.94647216796875, 22.871679306030273, -12.437524795532227, 32.88641357421875, 54.75917053222656, 4.6659698486328125, 12.144298553466797, 23.580408096313477, 43.25206756591797, 16.778762817382812, 26.755996704101562, 61.850059509277344, 4.056631088256836, 61.17559814453125, 32.40168762207031], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000221.npy"} +{"epoch": 0.3245227606461087, "step": 222, "batch_size": 64, "mean": 26.146482467651367, "std": 25.172771453857422, "min": -27.625926971435547, "p10": 0.5465541839599618, "median": 20.362693786621094, "p90": 63.852415084838874, "max": 105.46268463134766, "pos_frac": 0.90625, "sample": [4.1250457763671875, 53.767181396484375, 19.089599609375, 11.364892959594727, 7.063117980957031, 20.12663459777832, 6.02337646484375, 5.863006591796875, 12.300384521484375, 2.070831298828125, 20.55579376220703, 40.62813186645508, 105.46268463134766, 19.507030487060547, 41.42816162109375, 17.562965393066406, -8.768516540527344, 19.426673889160156, 59.77168273925781, 21.164443969726562, 36.191864013671875, 24.283470153808594, 7.9422607421875, 67.136474609375, -5.312553405761719, 29.610923767089844, 79.89579772949219, 18.504051208496094, 45.10877227783203, 3.9720497131347656, 46.416404724121094, 2.7089996337890625, 62.08864974975586, -0.6708469390869141, 27.154373168945312, 39.39112854003906, 3.9000492095947266, 26.56795310974121, 0.18031692504882812, 27.6466064453125, 1.4011077880859375, 31.341781616210938, -14.701560974121094, -6.8629913330078125, 77.08380126953125, 16.37323760986328, 38.032630920410156, 64.60831451416016, 11.217931747436523, 22.615615844726562, 51.503456115722656, 41.35834503173828, 66.32760620117188, 17.85599136352539, 2.201873779296875, 25.324745178222656, 37.34077453613281, -27.625926971435547, 31.58203125, 20.169593811035156, 44.113555908203125, 19.19940185546875, 68.69171142578125, 12.972038269042969], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000222.npy"} +{"epoch": 0.32599118942731276, "step": 223, "batch_size": 64, "mean": 22.292381286621094, "std": 22.9637451171875, "min": -23.31121063232422, "p10": -6.359392738342283, "median": 18.846364974975586, "p90": 51.67638244628906, "max": 77.1607666015625, "pos_frac": 0.828125, "sample": [10.247116088867188, -22.246612548828125, 33.32939910888672, 22.365066528320312, -1.5476112365722656, 29.115188598632812, 53.67364501953125, 15.507705688476562, 24.495254516601562, 43.38078308105469, 11.516695022583008, 8.7447509765625, 15.275222778320312, 38.360382080078125, 62.16981506347656, 38.066986083984375, 18.91522979736328, 23.47964096069336, 77.1607666015625, 6.289157867431641, 38.902748107910156, -7.420648574829102, 36.33232879638672, -22.549274444580078, 12.484539031982422, 50.2254638671875, -0.6914100646972656, 51.822601318359375, 44.374786376953125, -14.826454162597656, 8.957656860351562, 5.721866607666016, 56.83622741699219, -17.584564208984375, 31.227588653564453, 49.3392333984375, 2.261688232421875, 29.56853485107422, 18.77750015258789, 71.64593505859375, 49.83423614501953, -3.883129119873047, 15.119871139526367, 18.531341552734375, 10.6959228515625, 11.136104583740234, 36.17332458496094, 51.335205078125, 13.667823791503906, 11.280242919921875, 24.635847091674805, -0.17233848571777344, -12.238067626953125, 55.4246826171875, 15.291946411132812, 34.3952751159668, 4.785234451293945, 11.275251388549805, 13.870292663574219, 41.989410400390625, 27.426435470581055, 30.999420166015625, -23.31121063232422, 34.74440002441406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000223.npy"} +{"epoch": 0.3274596182085169, "step": 224, "batch_size": 64, "mean": 26.778461456298828, "std": 25.32072639465332, "min": -29.02960968017578, "p10": 0.05366420745849622, "median": 21.03948402404785, "p90": 66.3917678833008, "max": 86.04985046386719, "pos_frac": 0.90625, "sample": [38.67302703857422, 7.130523681640625, 10.546218872070312, 10.187271118164062, -0.5490455627441406, 57.44561004638672, 33.11815643310547, 7.55134391784668, 22.600143432617188, -29.02960968017578, 35.12175750732422, 74.7192611694336, 57.207645416259766, 19.822839736938477, 73.25634765625, 16.425819396972656, 31.963882446289062, 51.71648406982422, 30.473445892333984, 86.04985046386719, 60.04676818847656, 28.290191650390625, 42.08972930908203, 53.660865783691406, 13.705123901367188, -10.752376556396484, 29.22191619873047, 24.860605239868164, 47.499916076660156, 0.23942947387695312, 11.898162841796875, 17.871742248535156, 17.001033782958984, 71.22137451171875, 38.45256042480469, 45.41845703125, 16.86627960205078, 1.1301956176757812, 10.378105163574219, 21.042308807373047, 51.4593391418457, -6.318572998046875, 5.930961608886719, 0.0013675689697265625, 21.535707473754883, -14.62397575378418, 10.834442138671875, 8.075721740722656, 6.865119934082031, 20.584909439086914, 28.487342834472656, 48.63492965698242, 78.2537841796875, 13.218042373657227, 4.778425216674805, 10.35042953491211, 21.036659240722656, 33.461875915527344, 13.676738739013672, 0.175689697265625, 35.38917541503906, 80.48797607421875, 69.11105346679688, -2.159008026123047], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000224.npy"} +{"epoch": 0.328928046989721, "step": 225, "batch_size": 64, "mean": 32.95307922363281, "std": 26.78766632080078, "min": -11.536447525024414, "p10": 1.7362293243408218, "median": 29.352563858032227, "p90": 57.44227752685548, "max": 129.77105712890625, "pos_frac": 0.921875, "sample": [68.2926025390625, 48.717559814453125, 64.57060241699219, 83.85316467285156, 3.231609344482422, 11.008926391601562, 4.228340148925781, 22.7686767578125, 31.4437255859375, 53.069305419921875, 29.003883361816406, 52.29090118408203, 1.0953521728515625, 3.9480667114257812, 52.260009765625, 15.379268646240234, -11.536447525024414, 10.035085678100586, 0.3580818176269531, 9.750785827636719, 36.61107635498047, 35.51648712158203, 41.85675811767578, 55.04022979736328, 3.6606197357177734, 6.779655456542969, 55.386016845703125, 21.123451232910156, 28.48157501220703, 22.02393341064453, 82.06373596191406, 40.48116683959961, 52.96049499511719, 24.84674072265625, -9.013656616210938, 95.07522583007812, 44.560638427734375, -4.661979675292969, 53.72504425048828, 52.883148193359375, 29.326374053955078, 45.09394836425781, 12.853645324707031, 3.428800582885742, 48.97346496582031, 15.794242858886719, 129.77105712890625, 37.99873352050781, 34.5113525390625, 25.208038330078125, 55.03532409667969, 27.387603759765625, 49.470420837402344, 14.496978759765625, 9.764598846435547, 58.32353210449219, 45.95062255859375, 28.316818237304688, 43.838966369628906, -4.584930419921875, 25.41686248779297, 53.16542434692383, -3.093414306640625, 29.378753662109375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000225.npy"} +{"epoch": 0.3303964757709251, "step": 226, "batch_size": 64, "mean": 25.563575744628906, "std": 22.32744026184082, "min": -19.016036987304688, "p10": 2.3221681594848635, "median": 19.539816856384277, "p90": 59.095331573486334, "max": 87.50688171386719, "pos_frac": 0.90625, "sample": [87.50688171386719, -5.5353546142578125, 4.926792144775391, 32.05493927001953, 2.604795455932617, 55.26925277709961, 14.431015014648438, 29.662033081054688, 19.743553161621094, 2.2010421752929688, 35.739158630371094, 7.098320007324219, -19.016036987304688, 7.673698425292969, -1.8499755859375, 34.37754440307617, 16.221378326416016, 24.7567138671875, 43.89540100097656, 11.98797607421875, 40.81626892089844, 65.54747009277344, 25.614181518554688, 28.97394561767578, 6.8004302978515625, 44.01776123046875, 19.564064025878906, 24.594680786132812, 19.120376586914062, 61.76673889160156, 33.58465576171875, 7.571466445922852, 40.666053771972656, 26.46092414855957, 20.1420841217041, 53.174774169921875, 55.21435546875, 65.17533874511719, 59.63123321533203, -1.0248565673828125, 7.6901397705078125, 7.095359802246094, -5.460002899169922, 17.514984130859375, 14.172744750976562, -4.694549560546875, 16.20677947998047, 8.787178039550781, 18.351207733154297, 18.575706481933594, 9.126007080078125, 69.48262023925781, 27.914871215820312, 36.996192932128906, 19.51556968688965, 47.40106201171875, 12.78363037109375, 3.964609146118164, 76.24415588378906, 15.22576904296875, 14.839027404785156, 14.497346878051758, 30.832382202148438, 57.84489440917969], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000226.npy"} +{"epoch": 0.33186490455212925, "step": 227, "batch_size": 64, "mean": 31.536819458007812, "std": 26.311187744140625, "min": -12.12961196899414, "p10": 0.24247131347656325, "median": 24.61126136779785, "p90": 68.75740356445313, "max": 100.32586669921875, "pos_frac": 0.890625, "sample": [82.7899169921875, 18.978195190429688, 22.66425323486328, 78.14998626708984, 38.230140686035156, 38.46428680419922, 42.322662353515625, 47.58306884765625, 40.715797424316406, 2.0978660583496094, 11.890853881835938, 36.270050048828125, 22.86896514892578, -1.5636825561523438, -1.0010948181152344, 14.858314514160156, 49.99744415283203, 8.156150817871094, 37.06141662597656, 100.32586669921875, 36.20649719238281, 17.790874481201172, 15.617008209228516, 13.47109603881836, 22.222965240478516, 50.36415100097656, 59.039817810058594, 35.335044860839844, 37.01271057128906, 27.426136016845703, 38.951454162597656, 52.37683868408203, 42.61029052734375, 2.6134109497070312, -3.7937774658203125, 3.364349365234375, 18.633193969726562, 47.2579345703125, 66.43878173828125, 24.521697998046875, 24.700824737548828, 69.7510986328125, -12.12961196899414, 98.65560913085938, 8.635053634643555, 17.984817504882812, 24.195905685424805, 15.316558837890625, 36.95960235595703, 19.838821411132812, 30.902976989746094, 81.77389526367188, 87.0303955078125, 18.228683471679688, -2.5243988037109375, -0.08241844177246094, 59.15348815917969, -9.579742431640625, 1.0005474090576172, 16.444076538085938, 13.302820205688477, 19.517911911010742, 40.32093048095703, 60.637603759765625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000227.npy"} +{"epoch": 0.3333333333333333, "step": 228, "batch_size": 64, "mean": 24.27182960510254, "std": 20.918087005615234, "min": -19.869964599609375, "p10": -1.385177993774413, "median": 23.772400856018066, "p90": 52.129859924316406, "max": 77.79234313964844, "pos_frac": 0.875, "sample": [10.281929016113281, -19.869964599609375, -4.473785400390625, 27.381729125976562, 36.12739562988281, 77.79234313964844, -9.570411682128906, 21.372867584228516, 13.471792221069336, -1.8542518615722656, 7.319915771484375, 21.25958251953125, 52.30987548828125, 35.08483123779297, 64.10169982910156, 70.59597778320312, 58.33555603027344, 24.702198028564453, 31.49262809753418, 10.918960571289062, -6.506675720214844, 43.76287078857422, 18.055885314941406, 26.029067993164062, 38.55510711669922, 13.511505126953125, 26.660934448242188, 9.291830062866211, -0.29067230224609375, 43.20137023925781, 48.216773986816406, 17.241233825683594, 5.872919082641602, 55.111297607421875, 35.1463623046875, 10.385489463806152, 10.292510986328125, 17.23284149169922, 3.71551513671875, 38.068634033203125, 23.469552993774414, 24.07524871826172, 27.593307495117188, 13.145774841308594, 21.968093872070312, 28.75890350341797, 51.70982360839844, 1.7380218505859375, 30.951385498046875, 1.9208526611328125, 69.73886108398438, 19.24881362915039, -10.387725830078125, 33.00274658203125, 30.90630340576172, 35.01060485839844, 33.70381164550781, 27.405357360839844, 17.838829040527344, 49.458953857421875, 1.2676162719726562, 30.300155639648438, -5.191925048828125, 15.428062438964844], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000228.npy"} +{"epoch": 0.33480176211453744, "step": 229, "batch_size": 64, "mean": 27.103031158447266, "std": 25.559297561645508, "min": -31.225284576416016, "p10": 1.4650747299194342, "median": 21.140487670898438, "p90": 59.60950546264649, "max": 102.23683166503906, "pos_frac": 0.921875, "sample": [8.830154418945312, 5.075067520141602, 17.482269287109375, 57.83171081542969, 19.975482940673828, -16.10281753540039, 7.969779968261719, 53.111175537109375, 21.12786102294922, 52.051841735839844, 78.21507263183594, 59.010704040527344, 30.598281860351562, 62.664695739746094, 38.48722839355469, 32.790382385253906, 17.578392028808594, 18.92241668701172, 20.13705825805664, 62.652008056640625, 2.1920032501220703, 22.069923400878906, 6.591747283935547, 38.16071701049805, 9.303071975708008, 18.840373992919922, 102.23683166503906, -14.2669677734375, 12.443290710449219, 48.573944091796875, 33.51830291748047, 46.32921600341797, 33.13707733154297, 58.93275451660156, 66.33869934082031, 23.196962356567383, 65.32774353027344, 16.083518981933594, 15.35528564453125, -0.2688446044921875, 36.16985321044922, 3.0402984619140625, 8.823837280273438, 23.13678550720215, 2.5920066833496094, 21.153114318847656, 52.86979675292969, 52.99947738647461, 1.153533935546875, -31.225284576416016, 20.460290908813477, 59.86613464355469, 16.0504093170166, 22.66082763671875, 44.86668395996094, 36.62818908691406, 3.9892501831054688, 47.424835205078125, 2.843048095703125, 58.04214859008789, -17.49127197265625, 0.7612552642822266, 2.7063140869140625, 12.56805419921875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000229.npy"} +{"epoch": 0.33627019089574156, "step": 230, "batch_size": 64, "mean": 30.37428092956543, "std": 27.951574325561523, "min": -30.670623779296875, "p10": 1.9100021362304704, "median": 25.047624588012695, "p90": 76.90207901000977, "max": 98.85956573486328, "pos_frac": 0.921875, "sample": [18.136932373046875, 62.864845275878906, 7.042497634887695, 57.144981384277344, 20.92353057861328, 23.7531681060791, 27.184659957885742, 38.24927520751953, 20.66701316833496, 11.018383026123047, 26.119617462158203, 37.747291564941406, -25.054542541503906, 35.69775390625, 41.715240478515625, 24.7413330078125, -3.7394561767578125, 31.755584716796875, 4.0427093505859375, 27.020565032958984, 16.898406982421875, -5.934709548950195, 0.38605499267578125, 29.495227813720703, 9.768848419189453, 18.717212677001953, 18.013145446777344, 41.024391174316406, 56.45806884765625, 31.511985778808594, 18.99592399597168, 14.555294036865234, 16.62738037109375, 76.00582885742188, 47.35818099975586, 3.4720458984375, 27.36842918395996, 41.81475830078125, 36.41245651245117, 87.36439514160156, -0.2540626525878906, 1.2405548095703125, -30.670623779296875, 16.82897186279297, 19.07270050048828, 21.291797637939453, 77.28618621826172, 28.411771774291992, 91.3683853149414, 54.564910888671875, 28.838367462158203, 16.50790786743164, 25.35391616821289, 5.310323715209961, 80.9263916015625, 22.655014038085938, 98.85956573486328, 14.188644409179688, 96.21284484863281, 40.015846252441406, 60.44758605957031, 4.723106384277344, 8.919998168945312, 88.50918579101562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000230.npy"} +{"epoch": 0.3377386196769457, "step": 231, "batch_size": 64, "mean": 26.028377532958984, "std": 25.084552764892578, "min": -6.858421325683594, "p10": 3.54089698791504, "median": 16.68770980834961, "p90": 62.88970031738282, "max": 111.73956298828125, "pos_frac": 0.9375, "sample": [111.73956298828125, 33.92093276977539, 19.502174377441406, 35.660125732421875, 8.354957580566406, 27.31232452392578, 65.06013488769531, 6.340736389160156, 15.751523971557617, 70.0824203491211, 7.757053375244141, 7.4480743408203125, 8.853141784667969, 26.795421600341797, 82.66180419921875, 15.890840530395508, 30.497947692871094, 46.801273345947266, 41.26776885986328, 42.09718322753906, 67.57093048095703, 27.49972915649414, 13.985923767089844, 54.4727783203125, 8.483675003051758, 7.381797790527344, 35.18177795410156, 0.9001197814941406, 63.606109619140625, 94.11079406738281, 61.21807861328125, 60.89402770996094, 20.817718505859375, 10.675527572631836, -6.858421325683594, 10.100852966308594, 12.236007690429688, 43.69115447998047, 8.134323120117188, 4.488189697265625, 10.136672973632812, 11.482202529907227, 23.434646606445312, 5.690887451171875, 6.401939392089844, 6.722881317138672, 30.0111083984375, 4.890565872192383, 3.1349143981933594, 18.65673828125, 45.98271179199219, 35.79540252685547, 1.3991432189941406, 8.542125701904297, -1.9079608917236328, 52.523250579833984, -3.2178688049316406, 12.56039047241211, -0.2533721923828125, 17.359710693359375, 9.08782958984375, 27.9473876953125, 21.03266143798828, 16.015708923339844], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000231.npy"} +{"epoch": 0.3392070484581498, "step": 232, "batch_size": 64, "mean": 21.21156120300293, "std": 23.13524055480957, "min": -16.716339111328125, "p10": -6.193288421630858, "median": 17.532981872558594, "p90": 50.789568328857435, "max": 104.99148559570312, "pos_frac": 0.8125, "sample": [31.340545654296875, 31.062454223632812, 1.7677841186523438, 48.28233337402344, 3.27655029296875, 20.34667205810547, 36.440185546875, 38.86700439453125, 7.7740478515625, 57.51078796386719, -1.7901763916015625, 29.84790802001953, 38.759666442871094, -10.738121032714844, -4.824310302734375, 3.1395416259765625, 18.041473388671875, 37.383155822753906, -10.955215454101562, 15.019515991210938, 37.97840118408203, 69.13893127441406, -3.19940185546875, 31.063430786132812, 24.117095947265625, 7.516521453857422, 64.175048828125, -16.716339111328125, 10.328170776367188, 51.864097595214844, 21.85289764404297, 15.083793640136719, 10.428863525390625, 17.024490356445312, -5.0508270263671875, 32.34089660644531, -6.754646301269531, 2.076108932495117, 22.561447143554688, 104.99148559570312, 26.481201171875, -6.682914733886719, 18.384563446044922, -9.539073944091797, 14.128040313720703, 14.082704544067383, 37.05839538574219, -8.701696395874023, 36.89530944824219, -0.5399761199951172, 67.64466857910156, 8.138629913330078, 9.411026000976562, 43.455177307128906, 59.20771026611328, 31.539533615112305, 32.95642852783203, 31.66387939453125, 5.4121246337890625, 15.400177001953125, 5.5055694580078125, 17.009437561035156, 23.331436157226562, 3.9253921508789062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000232.npy"} +{"epoch": 0.3406754772393539, "step": 233, "batch_size": 64, "mean": 27.884883880615234, "std": 26.594215393066406, "min": -23.31106185913086, "p10": -1.431148910522458, "median": 22.731292724609375, "p90": 60.7330436706543, "max": 130.25161743164062, "pos_frac": 0.890625, "sample": [7.6144866943359375, 13.579265594482422, 60.072265625, 22.03753662109375, 14.22787857055664, 37.936363220214844, -7.617069244384766, -10.160179138183594, 6.3116607666015625, -23.31106185913086, 17.27008056640625, 33.37453079223633, 6.843055725097656, 45.957801818847656, 76.01849365234375, 71.03666687011719, 22.75896453857422, 17.23178482055664, 47.578147888183594, 59.07749938964844, 22.70362091064453, 40.85392761230469, 15.513084411621094, 60.460487365722656, 11.878425598144531, 34.509761810302734, 14.655937194824219, -10.362548828125, -7.939048767089844, 2.0049171447753906, 48.328895568847656, 40.497955322265625, 26.932113647460938, 49.57732391357422, 35.987701416015625, 58.11578369140625, 1.756561279296875, 52.26979064941406, 8.513290405273438, 25.477598190307617, 36.58088684082031, 61.548858642578125, 11.54037094116211, 28.3134765625, 18.551658630371094, 60.849853515625, 13.660053253173828, -12.891517639160156, 1.5022506713867188, 4.7104644775390625, 13.079437255859375, 25.074935913085938, 62.30387878417969, 40.04412841796875, 6.4405059814453125, 20.267257690429688, 9.864749908447266, 49.168357849121094, -2.6883201599121094, 130.25161743164062, 67.02566528320312, 15.299240112304688, 33.151702880859375, 41.409446716308594], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000233.npy"} +{"epoch": 0.342143906020558, "step": 234, "batch_size": 64, "mean": 28.14922332763672, "std": 22.41670799255371, "min": -35.81159973144531, "p10": 3.048023796081543, "median": 23.51495361328125, "p90": 60.15936355590822, "max": 89.17666625976562, "pos_frac": 0.9375, "sample": [-0.12805938720703125, 45.01659393310547, 53.0279541015625, 16.56536865234375, 37.77420425415039, 45.35387420654297, 30.782882690429688, 36.0564079284668, 53.65283966064453, 52.87040710449219, 15.699676513671875, 11.580001831054688, -35.81159973144531, 11.64095687866211, 13.671958923339844, 27.476234436035156, 6.213706970214844, 13.291267395019531, 19.6119384765625, 44.74420166015625, 75.23682403564453, 55.01060485839844, 8.691572189331055, 14.368003845214844, 24.247482299804688, 2.973968505859375, 43.62879943847656, 27.054046630859375, 13.196403503417969, 64.57351684570312, 3.013141632080078, 40.88860321044922, 62.36597442626953, 46.29887771606445, 89.17666625976562, 22.782424926757812, 26.86590576171875, 40.72896957397461, 18.58498764038086, 21.307662963867188, 35.97509765625, 41.788909912109375, 47.88642120361328, 15.55903434753418, 3.129415512084961, 14.87564468383789, 14.525650024414062, 2.5742568969726562, 33.234134674072266, 15.951595306396484, 15.899421691894531, 67.45762634277344, 64.2056884765625, -10.657821655273438, 21.230384826660156, 22.449462890625, 6.962715148925781, 37.44349670410156, 27.749801635742188, 19.774429321289062, 27.62107276916504, -5.7118072509765625, 63.05043029785156, 18.490020751953125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000234.npy"} +{"epoch": 0.3436123348017621, "step": 235, "batch_size": 64, "mean": 22.86014175415039, "std": 22.96712875366211, "min": -18.03559112548828, "p10": -1.917087554931639, "median": 16.808202743530273, "p90": 51.13296737670898, "max": 89.03157043457031, "pos_frac": 0.84375, "sample": [11.099403381347656, 28.130233764648438, 2.7014923095703125, 19.45508575439453, 29.828645706176758, 89.03157043457031, 12.226516723632812, 57.759552001953125, 19.108951568603516, 32.62538146972656, -18.03559112548828, -0.381805419921875, 7.0638885498046875, 45.859130859375, 12.44097900390625, -9.005859375, 18.879135131835938, 5.598335266113281, 28.38593292236328, -5.225563049316406, -3.1031570434570312, 8.4189453125, 11.957620620727539, -9.05517578125, 50.70594024658203, 50.684326171875, 10.3232421875, -0.152252197265625, 39.50914001464844, 52.28855895996094, 30.213302612304688, 18.880783081054688, 40.67039489746094, 13.035209655761719, 17.865985870361328, 29.54546356201172, 19.08954429626465, 47.96855926513672, 30.555076599121094, 28.823333740234375, 14.848159790039062, 43.33160400390625, 67.67401123046875, 10.211273193359375, 50.1212158203125, 35.80609130859375, 46.209251403808594, -2.5750656127929688, 4.881034851074219, 10.385841369628906, 75.83930969238281, 51.31597900390625, 2.6906166076660156, -0.1637420654296875, 10.659000396728516, -4.958616256713867, 5.4644317626953125, 15.750419616699219, 13.000091552734375, 5.533315658569336, 24.164337158203125, 15.738666534423828, 83.18130493164062, 8.170295715332031], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000235.npy"} +{"epoch": 0.34508076358296624, "step": 236, "batch_size": 64, "mean": 25.48858070373535, "std": 23.67022132873535, "min": -16.89471435546875, "p10": -2.0655002593994136, "median": 22.449556350708008, "p90": 63.003779602050784, "max": 81.48548889160156, "pos_frac": 0.859375, "sample": [64.44183349609375, 3.6033554077148438, -8.08538818359375, 36.358787536621094, 31.20827865600586, 28.72631072998047, 42.2591552734375, 7.81031608581543, 21.603702545166016, 17.970138549804688, 27.85357666015625, 76.50970458984375, 18.167741775512695, 37.901084899902344, 23.907020568847656, 35.52124786376953, 16.273611068725586, 23.29541015625, -16.89471435546875, 81.48548889160156, 15.977760314941406, -5.5594635009765625, 3.085531234741211, 67.71884155273438, 44.18745803833008, 5.0084686279296875, 10.733436584472656, 72.853515625, 42.656524658203125, 9.303976058959961, 34.32780456542969, 8.486246109008789, -4.5068359375, 35.638031005859375, 21.18924903869629, 30.394115447998047, -14.19903564453125, 64.91039276123047, 29.05347442626953, 10.102785110473633, 23.35205078125, 11.102226257324219, 13.520029067993164, 1.7263946533203125, -0.6653804779052734, 62.87004089355469, 20.217754364013672, 7.9239044189453125, 3.984180450439453, 11.094562530517578, 41.90711975097656, 61.81739807128906, 51.47492980957031, -3.124736785888672, 47.98052978515625, 29.40518569946289, 63.06109619140625, 39.852867126464844, 0.7748031616210938, 15.01397705078125, -1.798431396484375, 31.753677368164062, -2.1799583435058594, 48.92604064941406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000236.npy"} +{"epoch": 0.3465491923641703, "step": 237, "batch_size": 64, "mean": 27.525650024414062, "std": 24.088449478149414, "min": -30.515464782714844, "p10": 1.8745109558105468, "median": 24.824689865112305, "p90": 65.76077270507814, "max": 83.3187026977539, "pos_frac": 0.90625, "sample": [24.154342651367188, 24.814258575439453, 76.11864471435547, 55.62853240966797, 1.8664932250976562, 38.45171356201172, 26.684890747070312, 28.698341369628906, 9.160449981689453, 18.20782470703125, 9.748199462890625, 21.01192855834961, 63.00428771972656, 18.072921752929688, 66.94212341308594, 30.35034942626953, 8.53164291381836, 42.75419616699219, 1.893218994140625, 43.031883239746094, 44.91917419433594, 5.323173522949219, 77.12274169921875, 72.34768676757812, -2.816009521484375, 83.3187026977539, 11.164382934570312, 11.717765808105469, 29.24073028564453, 10.196479797363281, 71.29655456542969, 20.517181396484375, 34.07734680175781, 43.21321105957031, 20.124465942382812, 26.794113159179688, 15.381584167480469, 30.14165496826172, 24.835121154785156, 14.950607299804688, 48.404083251953125, -5.611228942871094, 45.92387390136719, 33.08306884765625, 71.44279479980469, 3.968597412109375, -0.6487274169921875, 35.278480529785156, -30.515464782714844, 52.647369384765625, 10.642454147338867, 17.221200942993164, -10.676559448242188, -5.7055206298828125, 29.590049743652344, 58.244354248046875, 4.667757034301758, 40.78773498535156, 15.192756652832031, 14.717456817626953, 10.165245056152344, 39.46057891845703, 1.9578399658203125, 28.410362243652344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000237.npy"} +{"epoch": 0.34801762114537443, "step": 238, "batch_size": 64, "mean": 24.64088249206543, "std": 20.25897979736328, "min": -12.141304016113281, "p10": 2.586801910400391, "median": 21.2393856048584, "p90": 49.528165435791024, "max": 82.74313354492188, "pos_frac": 0.953125, "sample": [6.5242919921875, 6.615875244140625, 5.243133544921875, 41.96393966674805, 12.954574584960938, 11.708038330078125, 25.36248779296875, 27.7947998046875, 12.372346878051758, 43.895111083984375, 17.709688186645508, 47.93565368652344, 17.021820068359375, 4.818294525146484, 31.422040939331055, 34.613433837890625, 36.604736328125, 2.3336563110351562, 25.975025177001953, 12.522941589355469, -3.991252899169922, 69.90716552734375, 18.016983032226562, 42.77014923095703, 0.4992656707763672, 82.74313354492188, 12.666763305664062, 24.292137145996094, 65.09771728515625, 1.167572021484375, 25.816314697265625, 10.88671875, 14.177154541015625, 30.42534637451172, 41.1195068359375, 7.686895370483398, 21.043712615966797, 27.368759155273438, 68.7672119140625, 29.635169982910156, 10.147510528564453, 16.354156494140625, 21.931991577148438, -12.141304016113281, 16.02307891845703, 3.91705322265625, 1.3464221954345703, 26.77386474609375, 32.56279754638672, 73.5896987915039, 30.90398406982422, 17.556114196777344, 21.43505859375, 46.05933380126953, -6.863838195800781, 65.49168395996094, 50.210670471191406, 23.9954776763916, 19.59625244140625, 3.1774749755859375, 35.47943115234375, 26.53548812866211, 20.914337158203125, 16.53139305114746], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000238.npy"} +{"epoch": 0.34948604992657856, "step": 239, "batch_size": 64, "mean": 25.54654312133789, "std": 21.156612396240234, "min": -15.41408920288086, "p10": 3.7882785797119145, "median": 22.45859146118164, "p90": 55.316453552246095, "max": 80.93455505371094, "pos_frac": 0.9375, "sample": [69.46001434326172, 23.39801025390625, 17.539813995361328, 32.19415283203125, 15.06821060180664, 25.82177734375, 28.641952514648438, 24.51303482055664, 4.101757049560547, 4.284599304199219, 23.008220672607422, 19.98382568359375, 7.467266082763672, 31.469772338867188, 10.254568099975586, 3.038705825805664, 52.40208435058594, 17.164451599121094, 24.067184448242188, 52.630123138427734, 49.00010681152344, 14.064483642578125, 32.87495422363281, -15.41408920288086, 13.549064636230469, 13.183588027954102, 37.38715362548828, 59.32099914550781, 24.210914611816406, 22.5538330078125, 7.823732376098633, 17.25445556640625, 55.172332763671875, 76.83549499511719, 1.9246978759765625, 22.36334991455078, 15.309898376464844, 79.4695816040039, 13.576858520507812, 44.511627197265625, 29.808589935302734, 55.37821960449219, 15.418136596679688, 31.82769775390625, -3.52972412109375, 12.036903381347656, -9.6553955078125, 3.6539306640625, -9.424003601074219, 30.396137237548828, 46.439491271972656, 28.848007202148438, 30.490203857421875, 16.005115509033203, 80.93455505371094, 25.444541931152344, 62.80955505371094, 12.558414459228516, 17.487293243408203, 30.04884910583496, 19.205291748046875, 7.259243011474609, 17.961524963378906, 12.093671798706055], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000239.npy"} +{"epoch": 0.3509544787077827, "step": 240, "batch_size": 64, "mean": 22.555252075195312, "std": 21.10706901550293, "min": -29.489578247070312, "p10": -2.7541986465454102, "median": 19.93775463104248, "p90": 53.512052917480474, "max": 64.00074768066406, "pos_frac": 0.84375, "sample": [19.018327713012695, 63.67582321166992, 18.01873016357422, 11.037796020507812, 25.881256103515625, 51.83033752441406, 3.904327392578125, 58.278961181640625, 4.225502014160156, -4.167816162109375, 21.32413101196289, 15.38494873046875, -2.535919189453125, 34.32818603515625, 49.17444610595703, 18.168458938598633, -2.7955970764160156, 13.383140563964844, 4.615196228027344, 48.842185974121094, 18.31085968017578, 63.86137390136719, 12.267570495605469, 38.98167419433594, 20.963340759277344, 13.286849975585938, -14.478706359863281, 6.027313232421875, 14.9608154296875, 64.00074768066406, 30.275482177734375, 36.10211181640625, 33.86994171142578, 24.174259185791016, -11.45660400390625, 44.64181137084961, 36.123992919921875, 20.99701690673828, -10.472373962402344, 24.405597686767578, 3.487041473388672, 41.84297561645508, -3.6214141845703125, 17.424983978271484, 41.686500549316406, -1.2905197143554688, 41.509132385253906, -2.657602310180664, 54.2327880859375, 30.560623168945312, 18.730144500732422, 59.09283447265625, 11.382293701171875, 22.598106384277344, 2.9239578247070312, 27.91008949279785, 27.929935455322266, 19.5554256439209, 62.57630920410156, 29.859298706054688, 9.751411437988281, 20.320083618164062, 18.78583526611328, -29.489578247070312], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000240.npy"} +{"epoch": 0.3524229074889868, "step": 241, "batch_size": 64, "mean": 31.69961929321289, "std": 31.25377655029297, "min": -24.563827514648438, "p10": -0.5832557678222654, "median": 27.302894592285156, "p90": 76.5036720275879, "max": 160.10690307617188, "pos_frac": 0.875, "sample": [21.63387680053711, 36.48139953613281, -7.040153503417969, 12.418645858764648, -0.35833740234375, 32.45329284667969, 53.536705017089844, 57.76475524902344, 39.315452575683594, 39.8052978515625, 48.74382400512695, 14.53961181640625, 2.879131317138672, 86.20700073242188, -5.582446098327637, 33.7515754699707, 8.428413391113281, 49.8741455078125, 52.34680938720703, 38.111854553222656, 26.79290771484375, 29.768089294433594, 11.306711196899414, 18.084625244140625, 160.10690307617188, 37.92579650878906, 21.64678955078125, 25.34239959716797, 80.63618469238281, -0.6796493530273438, 98.38102722167969, 27.051849365234375, 73.33650207519531, 18.02902603149414, 27.553939819335938, 4.815338134765625, 17.047584533691406, 33.37092590332031, 15.086906433105469, 30.685577392578125, 28.28771209716797, 19.478511810302734, 8.646368026733398, 12.642669677734375, 13.749885559082031, -5.548187255859375, 29.851829528808594, -3.2833824157714844, 39.36237335205078, 36.40995788574219, 17.806365966796875, 20.944625854492188, 16.15289306640625, 82.01509857177734, 48.674591064453125, 28.776649475097656, -22.428497314453125, 51.73851013183594, 66.56495666503906, 77.86103057861328, -24.563827514648438, 97.50470733642578, 9.268424987792969, 7.2621002197265625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000241.npy"} +{"epoch": 0.35389133627019087, "step": 242, "batch_size": 64, "mean": 24.644126892089844, "std": 21.845211029052734, "min": -19.23004150390625, "p10": 1.6598030090332034, "median": 19.669065475463867, "p90": 53.11238708496095, "max": 99.436279296875, "pos_frac": 0.9375, "sample": [-3.9956188201904297, 14.010135650634766, 9.536796569824219, 47.06852722167969, 19.614192962646484, -19.23004150390625, 12.618453979492188, 7.192771911621094, 50.717437744140625, 16.634124755859375, 5.105546951293945, 6.5867919921875, 74.5836181640625, 29.547422409057617, 23.12195587158203, 11.515266418457031, 16.17626190185547, 33.962425231933594, 62.90423583984375, 6.822969436645508, 33.91378402709961, 6.122184753417969, 35.08501434326172, 25.18645477294922, 40.09596252441406, 17.16021728515625, 42.265602111816406, 16.34326171875, 17.562299728393555, -10.775634765625, 21.407333374023438, 37.786338806152344, 19.72393798828125, -6.202392578125, 37.46678161621094, 19.181251525878906, 54.1387939453125, 99.436279296875, 16.15007781982422, 2.985870361328125, 1.8648757934570312, 24.268829345703125, 18.490982055664062, 71.11398315429688, 16.42278289794922, 4.64990234375, 49.360191345214844, 19.033729553222656, 35.38099670410156, 24.646974563598633, 0.425628662109375, 66.96372985839844, 15.220317840576172, 1.4953460693359375, 36.597625732421875, 1.5719146728515625, 28.77918243408203, 20.256839752197266, 28.924423217773438, 22.607986450195312, 7.813016891479492, 55.76123046875, 48.76792907714844, 27.279014587402344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000242.npy"} +{"epoch": 0.355359765051395, "step": 243, "batch_size": 64, "mean": 29.49078941345215, "std": 21.84927749633789, "min": -8.895929336547852, "p10": 4.926291847229005, "median": 25.51095962524414, "p90": 61.300480270385755, "max": 101.5250015258789, "pos_frac": 0.96875, "sample": [21.834228515625, 3.2497787475585938, 22.279109954833984, 55.84269714355469, 44.048988342285156, 37.434471130371094, 13.840755462646484, 37.264434814453125, -8.895929336547852, 6.0708160400390625, 3.789947509765625, 13.482757568359375, 66.67047119140625, 11.534208297729492, 14.505332946777344, 25.86804962158203, 25.15386962890625, 67.7083740234375, 9.068916320800781, 13.172683715820312, 1.691253662109375, 2.9955806732177734, 26.786174774169922, 28.07476234436035, 4.435781478881836, 29.543411254882812, 22.74528694152832, 17.104080200195312, 26.683334350585938, 30.74774169921875, 46.81635284423828, 52.61351776123047, 7.635950088500977, 51.86601257324219, 9.907913208007812, 85.46104431152344, 47.83876037597656, 42.80958557128906, 30.416610717773438, 43.77766418457031, 58.14552688598633, 23.009239196777344, 101.5250015258789, 72.4244155883789, 14.937171936035156, 36.03935241699219, 18.454330444335938, 12.840259552001953, 62.65260314941406, -1.1430530548095703, 29.99962043762207, 28.781822204589844, 32.094024658203125, 18.707275390625, 66.42369079589844, 17.441471099853516, 40.94938659667969, 16.255706787109375, 15.606880187988281, 36.420623779296875, 23.56036376953125, 29.78295135498047, 24.628149032592773, 13.998970031738281], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000243.npy"} +{"epoch": 0.3568281938325991, "step": 244, "batch_size": 64, "mean": 25.319198608398438, "std": 21.50766372680664, "min": -12.607946395874023, "p10": 0.47381610870361346, "median": 21.22218894958496, "p90": 58.764900970458996, "max": 75.14773559570312, "pos_frac": 0.921875, "sample": [60.35028076171875, 4.850563049316406, 10.97793197631836, 21.31737518310547, 9.436813354492188, 8.037002563476562, 60.46289825439453, 35.12607192993164, -4.94805908203125, 14.145530700683594, 26.487995147705078, 32.27848815917969, 7.192535400390625, 30.48766326904297, 17.22021484375, 32.93928909301758, 0.6754322052001953, 75.14773559570312, 13.952568054199219, 18.583173751831055, -7.5303802490234375, 27.882322311401367, 23.976852416992188, -12.607946395874023, 55.06568145751953, 7.720220565795898, 16.238677978515625, 40.63072204589844, 18.844314575195312, -1.6609573364257812, 45.45344543457031, 6.542585372924805, 43.77742004394531, 52.668052673339844, 64.40045166015625, 46.18095397949219, 46.715599060058594, 69.02058410644531, 3.7824325561523438, 49.006256103515625, 25.17865753173828, -6.264631271362305, 61.624420166015625, 0.088409423828125, 36.05519104003906, 0.3874092102050781, 28.90513801574707, 11.826444625854492, 7.827484130859375, 20.703662872314453, 21.127002716064453, 19.317298889160156, 1.9564838409423828, 25.632150650024414, 9.380142211914062, 31.761219024658203, 30.0599365234375, 65.12542724609375, 1.3599510192871094, 34.796173095703125, 12.389892578125, 46.445648193359375, 10.361717224121094, 53.5546875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000244.npy"} +{"epoch": 0.35829662261380324, "step": 245, "batch_size": 64, "mean": 26.90045928955078, "std": 27.2540225982666, "min": -20.270065307617188, "p10": 2.0435630798339854, "median": 19.41887092590332, "p90": 52.18039093017579, "max": 115.78179931640625, "pos_frac": 0.9375, "sample": [7.845802307128906, 50.24717712402344, 49.78566360473633, 41.59670639038086, 20.13528060913086, 17.283893585205078, 17.10883903503418, 0.804229736328125, 18.18536376953125, 18.70246124267578, 12.856170654296875, 20.385597229003906, 46.050682067871094, 25.760997772216797, 3.7159976959228516, 33.49034881591797, 42.80079650878906, 45.07405090332031, 11.059432983398438, 7.1432647705078125, 115.78179931640625, 20.742462158203125, -5.605720520019531, 47.45118713378906, 2.95361328125, 13.033401489257812, 23.45366668701172, -13.682758331298828, 5.741186141967773, 4.957405090332031, 46.27226257324219, 33.77058410644531, 101.12528991699219, 5.6642913818359375, 44.771766662597656, 12.892105102539062, 3.6472625732421875, 13.507741928100586, 33.974098205566406, 53.0089111328125, 4.159889221191406, 9.302619934082031, 43.6778564453125, 17.531646728515625, 34.01885223388672, -20.270065307617188, 47.048065185546875, 73.43207550048828, 23.814308166503906, 46.19651794433594, 71.96156311035156, 7.708732604980469, 85.76377868652344, 7.600778579711914, 0.8817176818847656, 98.78688049316406, 24.467844009399414, 40.62062072753906, 6.186367034912109, -1.2692031860351562, 29.26944351196289, 8.95892333984375, 6.633365631103516, 1.6535415649414062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000245.npy"} +{"epoch": 0.35976505139500736, "step": 246, "batch_size": 64, "mean": 23.103233337402344, "std": 21.114534378051758, "min": -16.053936004638672, "p10": 1.90443458557129, "median": 17.323326110839844, "p90": 55.52792663574219, "max": 80.6675033569336, "pos_frac": 0.921875, "sample": [29.299819946289062, 67.36518096923828, 28.195159912109375, 11.754974365234375, 1.1007080078125, 34.63203048706055, 80.6675033569336, -3.368377685546875, 25.587139129638672, -5.354545593261719, 6.910499572753906, 24.533058166503906, 23.276992797851562, 6.592798233032227, 30.020692825317383, 38.419654846191406, 54.47785186767578, 4.291744232177734, 7.4782867431640625, 25.93682861328125, 21.299358367919922, 52.43042755126953, 32.41291427612305, 17.05541229248047, 33.468055725097656, 4.4635772705078125, 34.67985153198242, 9.987060546875, 62.567413330078125, -2.929201126098633, 3.378438949584961, 27.038253784179688, 16.308446884155273, 10.810073852539062, 55.97795867919922, 10.271415710449219, 14.060684204101562, 5.834621429443359, 17.27104949951172, 8.517555236816406, 58.257781982421875, 41.18647766113281, 8.531242370605469, 37.0917854309082, 13.630090713500977, 44.40119934082031, 22.97342300415039, 2.9098968505859375, 2.9803848266601562, 13.547794342041016, -16.053936004638672, 17.37560272216797, 56.35716247558594, 10.050949096679688, 15.669631958007812, 19.50726890563965, 1.4735221862792969, 78.26365661621094, 31.563720703125, 48.18144226074219, 15.238960266113281, 33.51525115966797, -7.7497406005859375, 2.982074737548828], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000246.npy"} +{"epoch": 0.36123348017621143, "step": 247, "batch_size": 64, "mean": 32.50457000732422, "std": 25.603313446044922, "min": -19.49005126953125, "p10": 5.508189582824707, "median": 30.655773162841797, "p90": 64.82264251708985, "max": 103.99567413330078, "pos_frac": 0.921875, "sample": [5.194570541381836, 6.492544174194336, 23.259719848632812, 64.96803283691406, 32.50465393066406, 73.7529296875, 64.4833984375, 36.12696838378906, 16.879899978637695, 41.80049514770508, 78.2312240600586, 52.779266357421875, 35.347259521484375, 61.54307556152344, 13.10883903503418, 66.35201263427734, 80.34585571289062, 12.292415618896484, 49.34739685058594, 103.99567413330078, 27.185691833496094, -4.456853866577148, 32.322967529296875, 9.336830139160156, 8.908056259155273, 38.0670166015625, 3.2025146484375, 18.308856964111328, 54.1087646484375, 23.0177001953125, 51.27685546875, 51.500701904296875, 33.353904724121094, 29.39977264404297, 24.450790405273438, 44.516998291015625, 40.398101806640625, 29.993133544921875, 33.28923797607422, -11.351852416992188, 8.611675262451172, 31.31841278076172, 6.239967346191406, 8.409137725830078, 13.633218765258789, 13.680805206298828, 17.42874526977539, 36.001686096191406, 57.615997314453125, 40.03363800048828, 8.288002014160156, 8.420173645019531, 20.484352111816406, 95.22830200195312, 28.609771728515625, -3.3704490661621094, 28.07392120361328, 43.352325439453125, 59.10719680786133, -19.49005126953125, 54.54170227050781, -4.965217590332031, 16.760269165039062, 56.6434326171875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000247.npy"} +{"epoch": 0.36270190895741555, "step": 248, "batch_size": 64, "mean": 24.356069564819336, "std": 19.23394203186035, "min": -5.182277679443359, "p10": 3.9992078781127933, "median": 17.58814525604248, "p90": 56.45011711120606, "max": 77.38822174072266, "pos_frac": 0.96875, "sample": [32.623470306396484, 48.373291015625, 8.649166107177734, 16.159259796142578, 36.41210174560547, 0.8697834014892578, 10.2210693359375, 37.43410110473633, 3.444244384765625, 14.321258544921875, 7.698127746582031, 66.31998443603516, 15.627212524414062, 18.912267684936523, 42.178165435791016, 4.129663467407227, 12.504117965698242, 4.967519760131836, 30.437986373901367, 22.63461685180664, 14.532760620117188, 14.777801513671875, 17.741657257080078, 59.705352783203125, 12.4254150390625, 23.18595314025879, 35.215087890625, 40.95664978027344, 38.892051696777344, 60.66162109375, 18.962158203125, 13.452316284179688, 16.279563903808594, 31.0841064453125, 20.855085372924805, 52.79646301269531, 0.8837127685546875, 9.531166076660156, 17.716564178466797, 13.616153717041016, 16.08721923828125, 54.741092681884766, 31.964820861816406, 77.38822174072266, 24.93427276611328, 17.459726333618164, 62.541465759277344, 49.79020690917969, 19.81389617919922, 15.701393127441406, 0.06910324096679688, 4.406049728393555, 58.33575439453125, 31.690277099609375, 57.18255615234375, 30.90558624267578, 6.710906982421875, 13.407615661621094, 13.346733093261719, 10.737510681152344, 3.94329833984375, 15.70833969116211, -5.182277679443359, -0.08432579040527344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000248.npy"} +{"epoch": 0.3641703377386197, "step": 249, "batch_size": 64, "mean": 20.92279624938965, "std": 20.92414665222168, "min": -13.514167785644531, "p10": -3.8161849975585915, "median": 18.073017120361328, "p90": 51.985609436035155, "max": 74.16664123535156, "pos_frac": 0.828125, "sample": [3.4135875701904297, 37.286834716796875, 20.67205047607422, 14.483373641967773, 9.898826599121094, -4.803123474121094, 5.94207763671875, 16.36376953125, 25.755203247070312, 25.677093505859375, 24.471176147460938, 13.604522705078125, 7.225370407104492, 32.51994323730469, 4.4287109375, -13.514167785644531, 7.787784576416016, 51.484588623046875, 25.419883728027344, -1.5133285522460938, 19.0323486328125, 35.141605377197266, 5.025241851806641, 31.596282958984375, 1.3579864501953125, 45.148094177246094, 1.5603809356689453, 48.57450866699219, 52.20033264160156, 48.95615768432617, -5.461345672607422, 24.75326919555664, 17.113685607910156, 16.32898712158203, 13.654569625854492, 24.529518127441406, 26.675514221191406, -5.349220275878906, 30.113018035888672, 54.316978454589844, 40.54949951171875, -0.176727294921875, 74.16664123535156, -10.126754760742188, -5.4258575439453125, -0.22603225708007812, 30.58839225769043, 3.0184478759765625, 21.282142639160156, 9.206024169921875, 54.019126892089844, 35.44312286376953, 2.6375808715820312, 63.01483154296875, 50.652137756347656, 55.11894989013672, 6.1932525634765625, -13.3912353515625, 11.110458374023438, 20.1824951171875, -0.26505279541015625, 55.716217041015625, 41.08235168457031, 2.8168678283691406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000249.npy"} +{"epoch": 0.3656387665198238, "step": 250, "batch_size": 64, "mean": 25.81413459777832, "std": 18.444185256958008, "min": -20.155029296875, "p10": 3.6458049774169923, "median": 26.776209831237793, "p90": 46.64247131347656, "max": 72.4102783203125, "pos_frac": 0.9375, "sample": [42.470436096191406, 26.379209518432617, 12.591377258300781, -13.445613861083984, -20.155029296875, 39.34693908691406, 6.256385803222656, 40.93983459472656, 24.361841201782227, 29.760330200195312, 22.637371063232422, 30.16351318359375, 17.063369750976562, 36.46281433105469, 10.488197326660156, 25.809715270996094, 11.996665954589844, 23.4354248046875, 45.93380355834961, 33.987823486328125, 32.784149169921875, -4.23822021484375, 3.8048019409179688, 45.091209411621094, 4.716636657714844, 39.874244689941406, 19.99022674560547, 10.360414505004883, 28.13178253173828, 34.30706787109375, 22.74357032775879, 3.5776634216308594, 5.213035583496094, 64.00879669189453, -5.511020660400391, 49.83563232421875, 44.590576171875, 24.021240234375, 2.4156417846679688, 24.02104949951172, 29.28668212890625, 51.55860900878906, 32.778900146484375, 33.89141845703125, 48.684051513671875, 21.95631980895996, 32.28173065185547, 30.4110107421875, 27.17321014404297, 17.539794921875, 46.94618606567383, 12.115184783935547, 15.264425277709961, 45.55564880371094, 6.760383605957031, 30.86779022216797, 72.4102783203125, 65.36186218261719, 33.68883514404297, 34.43141174316406, 22.432815551757812, 0.275604248046875, 5.797698974609375, 38.41188049316406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000250.npy"} +{"epoch": 0.3671071953010279, "step": 251, "batch_size": 64, "mean": 25.741519927978516, "std": 20.797607421875, "min": -5.296670913696289, "p10": -0.7325302124023417, "median": 24.485797882080078, "p90": 55.7824691772461, "max": 72.34446716308594, "pos_frac": 0.890625, "sample": [54.15087127685547, 4.221488952636719, 68.98190307617188, 40.17266845703125, 42.59912872314453, 5.8397369384765625, 10.365386962890625, 56.652923583984375, 45.360877990722656, 26.287141799926758, 56.387115478515625, 1.2410049438476562, 36.51303482055664, 6.912609100341797, 27.359119415283203, -3.844573974609375, -1.5783309936523438, 72.34446716308594, 17.54090690612793, 33.75366973876953, 11.180789947509766, 34.421051025390625, 10.476276397705078, 48.71381378173828, 8.265037536621094, 13.596939086914062, -2.4530277252197266, -5.296670913696289, 30.956512451171875, 67.21954345703125, 10.341934204101562, -2.5866775512695312, 44.09906005859375, 13.706043243408203, 2.9639530181884766, 39.32927703857422, 3.509960174560547, 14.755645751953125, 24.684120178222656, 44.614620208740234, 24.2874755859375, 33.4290885925293, 16.26186180114746, 48.15667724609375, 4.2627105712890625, 57.26023864746094, 19.51080322265625, 62.472373962402344, 23.19458770751953, 26.20807647705078, -2.1134281158447266, 10.193592071533203, 48.76600646972656, 36.93717575073242, 54.37162780761719, 24.72757339477539, 5.971576690673828, 43.508544921875, 20.203123092651367, -1.8891334533691406, 18.04259490966797, 26.2840576171875, 6.354427337646484, 27.296371459960938], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000251.npy"} +{"epoch": 0.368575624082232, "step": 252, "batch_size": 64, "mean": 25.683515548706055, "std": 19.699735641479492, "min": -22.630157470703125, "p10": 2.879988098144531, "median": 23.110946655273438, "p90": 53.72540359497071, "max": 81.23387145996094, "pos_frac": 0.9375, "sample": [53.05427551269531, 37.78865051269531, 17.51276397705078, 55.763580322265625, 23.959991455078125, 6.175811767578125, 22.15534210205078, 29.688323974609375, 15.314628601074219, 53.98420715332031, 25.058151245117188, 2.9883270263671875, 41.20646667480469, 4.5748748779296875, 21.040489196777344, 41.329002380371094, 14.720165252685547, 20.592430114746094, 35.80188751220703, 19.155845642089844, 32.718223571777344, 54.49519348144531, 27.266300201416016, 15.559741973876953, 39.160308837890625, -0.8323707580566406, -4.022520065307617, 2.83355712890625, 12.400482177734375, 26.83991241455078, 3.5040664672851562, 28.673095703125, 53.12152862548828, 22.724143981933594, 33.67431640625, 6.9061737060546875, 36.73306655883789, 28.741230010986328, 22.635597229003906, 71.89602661132812, 7.747831344604492, 1.5472488403320312, 35.77861022949219, 21.397048950195312, 32.15138244628906, 81.23387145996094, 55.96278762817383, 6.065940856933594, 20.027462005615234, 44.27493667602539, 16.973663330078125, -22.630157470703125, 28.749496459960938, 38.167213439941406, 15.76849365234375, 20.720447540283203, 0.41178226470947266, 23.49774932861328, 72.72721862792969, 20.077632904052734, 32.491695404052734, -3.3478012084960938, 12.456794738769531, 24.600379943847656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000252.npy"} +{"epoch": 0.3700440528634361, "step": 253, "batch_size": 64, "mean": 24.096176147460938, "std": 23.356128692626953, "min": -29.50146484375, "p10": -0.6898841857910151, "median": 21.87054443359375, "p90": 56.18104324340821, "max": 90.86932373046875, "pos_frac": 0.875, "sample": [12.749715805053711, 21.267295837402344, 35.99620056152344, 30.257118225097656, 5.911949157714844, 9.540092468261719, 56.826927185058594, 18.381858825683594, 53.22773742675781, 13.26148796081543, 23.767669677734375, 62.665618896484375, 47.96379089355469, 3.3196029663085938, 35.44352722167969, 21.04498291015625, 51.16481018066406, 12.803985595703125, 14.537332534790039, 25.604827880859375, -12.397611618041992, -29.50146484375, 31.00152587890625, 42.40321350097656, 43.186737060546875, 4.686676025390625, 90.86932373046875, -0.9186859130859375, 2.7916412353515625, 5.422374725341797, 4.918373107910156, 26.910097122192383, 27.420818328857422, 32.037811279296875, 54.673980712890625, 66.99911499023438, 8.443840026855469, -2.4990711212158203, 68.31767272949219, 22.473793029785156, 14.128837585449219, 36.31794738769531, 35.827476501464844, 58.15447998046875, 39.85235595703125, 7.483917236328125, 35.91259765625, 42.23469543457031, -0.15601348876953125, -5.070077896118164, 12.097038269042969, 22.501625061035156, -21.05364990234375, 3.932373046875, 1.8040313720703125, 14.57760238647461, 30.002960205078125, 33.95207214355469, 67.33567810058594, 8.826446533203125, -1.4004707336425781, 0.65155029296875, 44.308555603027344, 16.956512451171875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000253.npy"} +{"epoch": 0.37151248164464024, "step": 254, "batch_size": 64, "mean": 24.636829376220703, "std": 22.709455490112305, "min": -13.478515625, "p10": -0.7494081497192374, "median": 20.53754425048828, "p90": 51.56536941528321, "max": 87.48858642578125, "pos_frac": 0.890625, "sample": [31.6702880859375, 1.2179832458496094, 33.189735412597656, 12.825475692749023, 40.64435577392578, 35.60084915161133, 5.2562713623046875, -1.8152332305908203, 2.3504486083984375, 15.590213775634766, 16.713890075683594, 55.83097839355469, 51.295440673828125, 22.326566696166992, 0.5062465667724609, -11.431133270263672, 24.769882202148438, 14.799495697021484, 31.012725830078125, 39.48866271972656, 2.308483123779297, 50.14567565917969, 84.2740478515625, 51.681053161621094, 44.8028564453125, 1.8476409912109375, 36.2259521484375, 49.61610412597656, 0.11711502075195312, 12.312623977661133, 12.720605850219727, 50.71751403808594, 21.06438636779785, -13.478515625, 3.4732818603515625, 49.82978820800781, 10.795135498046875, 59.99577331542969, 16.14529800415039, 36.6250114440918, 7.7290802001953125, -12.144012451171875, 21.415922164916992, 57.73969268798828, -1.1207752227783203, 39.479270935058594, 2.7476139068603516, 34.591033935546875, 12.242300033569336, 19.85259246826172, 48.098419189453125, 20.455062866210938, 20.620025634765625, 15.885360717773438, -5.5195465087890625, 87.48858642578125, 18.47625732421875, 50.4166259765625, 21.74469757080078, -1.4968719482421875, 8.196609497070312, 63.719642639160156, 30.338096618652344, 12.738395690917969], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000254.npy"} +{"epoch": 0.37298091042584436, "step": 255, "batch_size": 64, "mean": 27.34168243408203, "std": 28.56133460998535, "min": -56.669097900390625, "p10": 0.02091140747070408, "median": 21.643768310546875, "p90": 69.71080474853518, "max": 101.58807373046875, "pos_frac": 0.890625, "sample": [101.58807373046875, 21.220123291015625, 8.132858276367188, 11.530044555664062, 24.422821044921875, 24.470993041992188, 12.340787887573242, 5.7871246337890625, -56.669097900390625, 16.004741668701172, 42.47669219970703, 17.050437927246094, 8.20465087890625, 8.18979263305664, 45.11675262451172, 26.562694549560547, -15.344772338867188, -11.260406494140625, 22.067413330078125, 57.43782424926758, 51.86785888671875, 12.979616165161133, 26.736572265625, 11.177181243896484, 34.60902786254883, 3.114715576171875, -0.3937950134277344, 85.66970825195312, 8.690452575683594, 34.43025207519531, 16.408096313476562, 39.83635330200195, 27.86951446533203, 72.63481140136719, 79.31295776367188, 43.69972229003906, 56.2266960144043, 34.37335205078125, 45.89225769042969, 92.89071655273438, 3.2316741943359375, -8.875768661499023, 36.83711624145508, 33.866085052490234, 16.327686309814453, 23.698036193847656, -0.3812408447265625, 13.89706802368164, 4.422698974609375, 78.01765441894531, 83.0201416015625, 15.063352584838867, 0.9592666625976562, 9.060096740722656, 6.363578796386719, 18.9818115234375, 41.33794403076172, 44.82389831542969, 15.440208435058594, 32.93589782714844, -6.294639587402344, 14.924592971801758, 61.966835021972656, 62.88812255859375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000255.npy"} +{"epoch": 0.3744493392070485, "step": 256, "batch_size": 64, "mean": 27.345840454101562, "std": 25.15676498413086, "min": -33.968597412109375, "p10": 2.101638412475586, "median": 20.493896484375, "p90": 57.833874511718754, "max": 106.14176940917969, "pos_frac": 0.921875, "sample": [43.83059310913086, 106.14176940917969, 16.336532592773438, 41.00764465332031, 32.90582275390625, 56.27587890625, 13.654993057250977, 14.043136596679688, -8.476582527160645, 23.774391174316406, 17.67910385131836, 72.48108673095703, 44.260101318359375, 44.55914306640625, 43.805484771728516, -3.7725677490234375, 18.591068267822266, 38.15726852416992, -4.276924133300781, 6.162702560424805, 16.507919311523438, 19.016143798828125, 4.534053802490234, 38.634765625, 26.984729766845703, 40.223876953125, 17.388214111328125, 2.1117935180664062, 10.20857048034668, 21.85472869873047, 53.94915771484375, 2.9711990356445312, 17.57332992553711, 38.03654861450195, 0.47171783447265625, 42.355323791503906, 2.1177444458007812, 13.296478271484375, 28.053329467773438, 15.446762084960938, -33.968597412109375, 21.786468505859375, 6.4462127685546875, 19.090805053710938, 16.0484619140625, 2.0972862243652344, 14.258323669433594, 41.931365966796875, 10.17205810546875, 36.354164123535156, 10.255821228027344, 37.96385955810547, 23.330726623535156, 58.5015869140625, 60.50456237792969, 94.40005493164062, 19.201324462890625, 10.438911437988281, 79.53279113769531, 54.069427490234375, 31.92560577392578, 30.56088638305664, -2.027555465698242, 78.38211059570312], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000256.npy"} +{"epoch": 0.37591776798825255, "step": 257, "batch_size": 64, "mean": 30.103281021118164, "std": 26.88237190246582, "min": -12.153640747070312, "p10": -1.5420700073242186, "median": 29.766724586486816, "p90": 69.0323471069336, "max": 106.3148193359375, "pos_frac": 0.875, "sample": [55.23321533203125, 3.658039093017578, 33.46302032470703, 34.57613754272461, 69.8831558227539, 4.377128601074219, 51.94959259033203, 58.02781677246094, 36.743953704833984, 26.355178833007812, -7.808111190795898, 2.036346435546875, 14.055397033691406, 65.02925109863281, 35.74585723876953, 8.977394104003906, 38.47465515136719, 34.88279724121094, 3.1442604064941406, 81.18411254882812, 23.344627380371094, 78.39363098144531, 69.45832824707031, -9.045671463012695, 33.86501693725586, 33.249393463134766, 16.831165313720703, -5.9580230712890625, 3.6340866088867188, 106.3148193359375, 55.68340301513672, 33.55784606933594, 52.54613494873047, 78.69622802734375, 33.280494689941406, 4.07861328125, 30.41636085510254, 5.598611831665039, 40.681793212890625, -1.5532302856445312, 10.805221557617188, 55.61761474609375, -7.530124664306641, 58.257076263427734, 8.585289001464844, 29.117088317871094, 21.06316375732422, 20.663955688476562, 68.03839111328125, 52.657920837402344, 28.671783447265625, 33.98719024658203, 16.651512145996094, 36.66790008544922, 20.47283172607422, 81.31558227539062, 12.212738037109375, -2.565929412841797, 10.646873474121094, -12.153640747070312, -1.5160293579101562, 7.025672912597656, 9.308425903320312, 35.546630859375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000257.npy"} +{"epoch": 0.37738619676945667, "step": 258, "batch_size": 64, "mean": 26.426227569580078, "std": 27.432861328125, "min": -44.445587158203125, "p10": 0.47082748413085984, "median": 24.691619873046875, "p90": 62.84750442504883, "max": 97.5058364868164, "pos_frac": 0.90625, "sample": [47.51805114746094, 5.808498382568359, 4.057018280029297, 46.875244140625, 15.927230834960938, 35.142311096191406, 77.67838287353516, -18.676422119140625, 15.44268798828125, 4.401329040527344, -3.276639938354492, 0.28170013427734375, 34.09844207763672, 30.57537078857422, 25.915468215942383, 0.9121246337890625, 5.4476318359375, 8.553642272949219, 51.99922180175781, 12.696495056152344, 4.4294891357421875, 71.35486602783203, 71.81521606445312, 15.483078002929688, 55.963653564453125, 40.17292785644531, 42.52911376953125, 12.009603500366211, 14.170692443847656, 64.9633560180664, 35.62012481689453, 29.964324951171875, 54.6806640625, 5.0657806396484375, -10.782833099365234, 18.433670043945312, 75.72930908203125, -44.445587158203125, 14.221244812011719, 15.044706344604492, 61.106834411621094, 5.2836151123046875, 5.516387939453125, 45.684356689453125, 12.567825317382812, -39.895538330078125, 97.5058364868164, 45.676414489746094, -11.50328254699707, 10.694869995117188, 24.881439208984375, 4.356803894042969, 39.328948974609375, 58.27802276611328, 63.593505859375, 48.32330322265625, 39.44007110595703, 36.77105712890625, 43.00458526611328, 31.707828521728516, 29.766494750976562, 9.630184173583984, 24.501800537109375, 17.256057739257812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000258.npy"} +{"epoch": 0.3788546255506608, "step": 259, "batch_size": 64, "mean": 23.946025848388672, "std": 21.142629623413086, "min": -27.04534149169922, "p10": 0.5028085708618171, "median": 19.7800350189209, "p90": 48.131640625, "max": 76.01190185546875, "pos_frac": 0.90625, "sample": [44.65584182739258, -6.615211486816406, 37.97248077392578, 47.66747283935547, 14.546180725097656, -27.04534149169922, 48.23768615722656, 47.88420104980469, 45.83465576171875, 31.22763442993164, 6.406124114990234, 15.08935546875, 30.831764221191406, 13.796279907226562, 13.824554443359375, 10.734184265136719, 23.64899444580078, 5.575927734375, 69.71891784667969, 41.98711395263672, 36.615943908691406, 6.3838348388671875, 6.42242431640625, 24.369171142578125, 34.78126525878906, 4.550693511962891, 18.392635345458984, 13.23086166381836, 76.01190185546875, 1.1513214111328125, 13.426055908203125, 8.674427032470703, 72.895263671875, 43.826805114746094, -0.6759872436523438, 20.4097900390625, 35.843177795410156, 60.61768341064453, -1.3183517456054688, 60.19525909423828, 13.034919738769531, 3.3223419189453125, 41.69349670410156, 24.821590423583984, 20.615074157714844, 5.604545593261719, 41.079864501953125, 37.183929443359375, -3.5710906982421875, 23.23058319091797, -0.23975563049316406, 36.565940856933594, 60.47661590576172, 10.664291381835938, 33.20196533203125, 0.22487449645996094, 9.232118606567383, 39.82518768310547, 19.150279998779297, 5.288787841796875, 15.080394744873047, 5.924427032470703, 24.93030548095703, 13.421897888183594], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000259.npy"} +{"epoch": 0.3803230543318649, "step": 260, "batch_size": 64, "mean": 27.156875610351562, "std": 25.070676803588867, "min": -13.670892715454102, "p10": 1.2839290618896493, "median": 24.836669921875, "p90": 59.13706855773926, "max": 100.586181640625, "pos_frac": 0.90625, "sample": [24.796180725097656, -11.734926223754883, -0.9280929565429688, 2.403654098510742, 47.25763702392578, 25.467487335205078, 43.75559997558594, 48.61700439453125, 28.045654296875, 10.379310607910156, 13.16855239868164, 46.220767974853516, 13.52780532836914, 21.194580078125, 30.400291442871094, 100.586181640625, 27.453588485717773, 17.20415496826172, 7.0007171630859375, 60.146881103515625, -13.670892715454102, 12.583724975585938, 16.79184913635254, 58.9539909362793, -3.7187957763671875, 7.294303894042969, 40.41019821166992, 4.876983642578125, 32.63496017456055, 41.36410903930664, -0.20801544189453125, 56.50685119628906, 38.40277862548828, 44.56053161621094, 35.95415115356445, 52.55778884887695, 95.45592498779297, 24.91718292236328, 0.9141120910644531, 17.933013916015625, -11.455745697021484, 27.861711502075195, 37.47882843017578, 4.8930206298828125, 35.80586242675781, 2.2604808807373047, 9.566421508789062, 12.829124450683594, 37.41920471191406, 33.96879577636719, 31.12860107421875, 9.437088012695312, 2.1468353271484375, 85.46257019042969, 24.877159118652344, 13.19818115234375, 3.77471923828125, 23.294418334960938, 5.693532943725586, 76.4311752319336, 14.784408569335938, 68.54438018798828, 9.945987701416016, 59.21553039550781], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000260.npy"} +{"epoch": 0.38179148311306904, "step": 261, "batch_size": 64, "mean": 28.690322875976562, "std": 26.597740173339844, "min": -11.97327995300293, "p10": -2.721918106079101, "median": 23.389793395996094, "p90": 64.23923873901367, "max": 137.51593017578125, "pos_frac": 0.875, "sample": [77.14154052734375, 33.816551208496094, 15.235408782958984, -2.3847808837890625, 16.942913055419922, 137.51593017578125, 12.532979965209961, 70.07997131347656, 5.551750183105469, 25.68383026123047, 22.532312393188477, 10.508220672607422, 1.633209228515625, 68.57516479492188, 8.486564636230469, 29.69776153564453, 14.893062591552734, 32.759857177734375, 23.275375366210938, -6.745927810668945, 15.401092529296875, 26.646438598632812, 17.47600746154785, 18.209861755371094, 38.57355499267578, 60.89281463623047, 46.22935485839844, 20.593826293945312, 64.5863265991211, 16.357994079589844, -6.107898712158203, 23.50421142578125, 5.282907485961914, 36.68960952758789, 25.875274658203125, -4.452795028686523, 72.62271118164062, 31.15247344970703, 49.53208923339844, 19.998367309570312, 79.66571044921875, 32.11263656616211, 27.098373413085938, 63.42936706542969, 16.754484176635742, 46.20893859863281, -6.160663604736328, 44.56559371948242, 8.487838745117188, 15.139122009277344, 50.03070068359375, 51.64994812011719, 7.963081359863281, -2.866405487060547, -11.97327995300293, 34.07638931274414, 46.61088562011719, -4.918973922729492, 32.46720886230469, 35.248748779296875, 17.518577575683594, 14.632827758789062, 59.44978713989258, 2.1937637329101562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000261.npy"} +{"epoch": 0.3832599118942731, "step": 262, "batch_size": 64, "mean": 25.835268020629883, "std": 21.952381134033203, "min": -29.028244018554688, "p10": 0.23545761108398588, "median": 25.458702087402344, "p90": 49.429504013061525, "max": 91.57423400878906, "pos_frac": 0.890625, "sample": [25.939189910888672, 34.408843994140625, 21.642974853515625, 31.098649978637695, -6.4638519287109375, 35.90205383300781, 76.97607421875, 45.25791931152344, 7.3737335205078125, 21.234375, -10.239828109741211, -29.028244018554688, 8.69003677368164, 49.76109313964844, 11.160858154296875, -18.690895080566406, 35.1006965637207, 8.705192565917969, -6.125579833984375, 28.9678955078125, 27.950138092041016, 20.38301658630371, -0.39342498779296875, 34.44012451171875, 33.50856018066406, 16.697677612304688, 4.597833633422852, 41.89073181152344, 52.62711715698242, 44.58113098144531, 6.690349578857422, 12.201942443847656, 21.455718994140625, -6.2266387939453125, 37.04387664794922, 27.930946350097656, 25.875198364257812, 24.459640502929688, 23.99547576904297, 48.65579605102539, 62.386077880859375, 36.288780212402344, 33.607994079589844, 80.808837890625, 18.484590530395508, 53.078041076660156, 29.866287231445312, 8.443016052246094, 18.906936645507812, 16.804956436157227, 24.801925659179688, 29.990936279296875, 14.853225708007812, 91.57423400878906, 6.777435302734375, 1.702850341796875, 43.863441467285156, 48.105682373046875, 31.347774505615234, 43.856849670410156, 17.427980422973633, 19.525527954101562, 25.798324584960938, 25.11907958984375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000262.npy"} +{"epoch": 0.38472834067547723, "step": 263, "batch_size": 64, "mean": 30.601646423339844, "std": 26.217376708984375, "min": -13.791454315185547, "p10": 3.6238365173339853, "median": 26.123005867004395, "p90": 67.71236267089844, "max": 115.3145751953125, "pos_frac": 0.9375, "sample": [64.70477294921875, 1.7556915283203125, 23.03497314453125, 21.5521240234375, 30.057098388671875, 26.60704803466797, 39.920265197753906, 20.897323608398438, 3.203826904296875, 34.09933090209961, -7.23419189453125, 54.92939758300781, 14.526762008666992, 53.99580383300781, 31.266220092773438, 13.209259033203125, 8.595653533935547, 48.72242736816406, 5.926216125488281, 13.82513427734375, -6.752740859985352, 4.879219055175781, 22.911407470703125, 73.51509094238281, -7.580423355102539, 15.175849914550781, 51.10760498046875, 25.63896369934082, 4.603858947753906, 16.306903839111328, 7.238273620605469, 4.634899139404297, 94.6734390258789, 6.849985122680664, 14.51846694946289, 69.41252136230469, 33.94602966308594, 15.945846557617188, 64.89310455322266, 13.16802978515625, 8.774890899658203, 34.050575256347656, 43.523895263671875, 37.261131286621094, 80.4212417602539, 41.89454650878906, 68.92061614990234, 15.130172729492188, 38.283111572265625, 42.722755432128906, 30.51923370361328, 55.76781463623047, 75.27815246582031, -13.791454315185547, 5.048957824707031, 47.23527526855469, 24.515213012695312, 10.825820922851562, 40.83152770996094, 39.68378448486328, 45.494903564453125, 115.3145751953125, 40.731117248535156, 1.3861236572265625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000263.npy"} +{"epoch": 0.38619676945668135, "step": 264, "batch_size": 64, "mean": 24.097835540771484, "std": 22.14469337463379, "min": -27.62053680419922, "p10": 0.9869392395019538, "median": 20.273197174072266, "p90": 62.068843078613284, "max": 72.20577239990234, "pos_frac": 0.90625, "sample": [39.58421325683594, 11.428398132324219, 10.91085433959961, 47.82855224609375, 30.957637786865234, -7.639137268066406, 4.522895812988281, 69.81009674072266, 13.331165313720703, 62.58177185058594, 7.167724609375, 12.098592758178711, 8.611848831176758, -1.9552078247070312, 31.36490249633789, 21.09064483642578, 70.97991943359375, 12.603927612304688, 27.32697296142578, 67.99324035644531, 19.74554443359375, 20.376220703125, 5.455535888671875, 23.53805923461914, 14.788154602050781, 36.557525634765625, 18.564064025878906, 24.303077697753906, 11.481147766113281, 25.499465942382812, 8.690017700195312, -7.295379638671875, 31.306997299194336, 47.24652099609375, 1.767608642578125, 58.186439514160156, 20.17017364501953, 5.750434875488281, 35.01184844970703, 49.47162628173828, 24.252086639404297, 18.06353759765625, 29.024383544921875, -0.5931472778320312, 9.211555480957031, 68.91850280761719, 35.98802185058594, -27.62053680419922, 11.151535034179688, 13.546592712402344, 35.74379348754883, 20.81863021850586, 19.74994659423828, 63.18309020996094, 4.815540313720703, 27.929126739501953, 35.637176513671875, 4.0746612548828125, -6.149578094482422, 60.87200927734375, 72.20577239990234, 22.851200103759766, 6.7210693359375, 0.6523666381835938], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000264.npy"} +{"epoch": 0.3876651982378855, "step": 265, "batch_size": 64, "mean": 25.292123794555664, "std": 25.837305068969727, "min": -18.707897186279297, "p10": -0.47680587768554616, "median": 20.27764129638672, "p90": 66.78379592895507, "max": 97.77685546875, "pos_frac": 0.890625, "sample": [1.9214935302734375, 97.77685546875, 72.16172790527344, 40.76377868652344, 23.990428924560547, -8.691644668579102, 21.390853881835938, 0.6749496459960938, 88.2987060546875, 29.219696044921875, 66.87370300292969, 2.4890518188476562, 5.961994171142578, 11.69247817993164, 0.2314453125, 26.11849594116211, 27.77947998046875, -18.707897186279297, 39.56800079345703, 25.701507568359375, 45.273406982421875, 77.12577819824219, -4.696624755859375, 83.42984008789062, -11.661941528320312, 52.616363525390625, 16.70355224609375, 66.57401275634766, 16.93399429321289, 80.26641082763672, 8.07796859741211, 13.847824096679688, 16.667943954467773, 22.226734161376953, 6.0147247314453125, 2.7953262329101562, 3.5895423889160156, 7.8822021484375, 46.20282745361328, 1.5479888916015625, 29.38011932373047, 13.608535766601562, 17.515066146850586, 32.78712463378906, 13.56637954711914, 8.889228820800781, -7.177986145019531, 31.797773361206055, 0.7993927001953125, 16.333724975585938, 19.1644287109375, -0.7803421020507812, 51.836181640625, 44.59911346435547, 31.16950225830078, 31.53774070739746, 37.97284698486328, 11.496711730957031, 25.082279205322266, -1.2561378479003906, 28.108224868774414, 14.251827239990234, 24.973182678222656, 36.40805435180664], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000265.npy"} +{"epoch": 0.3891336270190896, "step": 266, "batch_size": 64, "mean": 24.757902145385742, "std": 22.67148208618164, "min": -19.329666137695312, "p10": -1.4016380310058585, "median": 19.418917655944824, "p90": 57.422105407714845, "max": 71.33233642578125, "pos_frac": 0.859375, "sample": [17.18353271484375, 62.12779235839844, 19.562150955200195, 19.275684356689453, 38.773582458496094, 26.06238555908203, 1.9921722412109375, 54.5716552734375, 28.30291748046875, 70.54505920410156, 70.47845458984375, 57.20172119140625, 5.693046569824219, 46.53734588623047, -2.083251953125, 13.535621643066406, 39.541690826416016, 26.955535888671875, 39.07155990600586, 10.724746704101562, 5.523380279541016, -6.539569854736328, -19.329666137695312, 70.2994613647461, 6.7488555908203125, 46.73956298828125, -0.5797500610351562, 11.65985107421875, 0.8225364685058594, 39.655067443847656, 71.33233642578125, 29.51416778564453, 58.151123046875, 56.41935729980469, 45.29142761230469, 16.259613037109375, 35.95212936401367, 44.3775634765625, 14.755882263183594, 16.78504180908203, 25.222267150878906, 13.1553955078125, 22.1339054107666, 24.9276180267334, 40.21364974975586, 7.219600677490234, 1.7377395629882812, -1.753875732421875, 57.51655578613281, 49.035621643066406, -7.90869140625, 3.1707839965820312, 17.479217529296875, 10.557235717773438, -0.14726829528808594, 17.264968872070312, 15.91859245300293, 3.2954330444335938, -9.537788391113281, 35.13701629638672, 35.41181945800781, 26.240814208984375, -3.693826675415039, 12.0172119140625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000266.npy"} +{"epoch": 0.39060205580029367, "step": 267, "batch_size": 64, "mean": 27.410789489746094, "std": 22.108509063720703, "min": -7.691558837890625, "p10": 2.883445739746094, "median": 24.24208641052246, "p90": 54.281617736816415, "max": 103.44767761230469, "pos_frac": 0.921875, "sample": [34.284461975097656, 16.068519592285156, 27.056602478027344, 1.2364845275878906, 34.09747314453125, 5.147344589233398, 48.408721923828125, 35.132415771484375, 20.22138214111328, 17.934452056884766, 5.8787689208984375, -7.691558837890625, 3.371307373046875, 47.94303894042969, 15.225517272949219, 17.786521911621094, 13.290725708007812, 61.92235565185547, 51.772003173828125, 103.44767761230469, 17.27820587158203, 23.61646270751953, 38.98352813720703, -5.6884765625, 2.6743621826171875, 30.968923568725586, 36.31280517578125, 11.704339981079102, 62.68470764160156, 65.357421875, 27.72125244140625, 52.34503936767578, 64.10205078125, 33.793914794921875, 11.64048957824707, 16.599945068359375, -6.4454345703125, 15.284141540527344, 6.248119354248047, 43.564659118652344, 55.11157989501953, 6.768333435058594, 9.155706405639648, 85.35572814941406, -4.4759521484375, 22.6251220703125, 17.96905517578125, 36.862449645996094, 24.826324462890625, 32.7554931640625, 7.253192901611328, 30.895782470703125, -2.5548934936523438, 37.597312927246094, 14.373039245605469, 14.4100341796875, 25.135643005371094, 49.71121597290039, 17.177396774291992, 31.113937377929688, 46.661460876464844, 23.657848358154297, 44.02558898925781, 26.59837532043457], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000267.npy"} +{"epoch": 0.3920704845814978, "step": 268, "batch_size": 64, "mean": 25.404464721679688, "std": 21.823596954345703, "min": -14.694854736328125, "p10": -0.30285148620605423, "median": 22.961944580078125, "p90": 53.16849060058595, "max": 98.21807861328125, "pos_frac": 0.890625, "sample": [17.401519775390625, 70.59181213378906, 27.742935180664062, 21.499839782714844, 37.033531188964844, 12.394126892089844, 39.72438049316406, -1.2128639221191406, 42.31010437011719, 32.47810363769531, 59.87532043457031, -12.353034973144531, 39.40596008300781, 18.68146514892578, 74.05056762695312, -5.960357666015625, 4.222007751464844, 16.28956413269043, -14.694854736328125, 50.902740478515625, 20.116676330566406, 41.62535095214844, 2.2751426696777344, 37.74357604980469, 19.47549057006836, 50.333099365234375, 26.6492919921875, 98.21807861328125, -0.4978485107421875, 38.720245361328125, -0.7190361022949219, 5.9841156005859375, 14.95229721069336, 9.237388610839844, 58.126434326171875, 16.773948669433594, 27.7004337310791, 35.546363830566406, 23.207366943359375, 46.12998962402344, 23.37092399597168, 42.68060302734375, 17.30614471435547, 0.15214157104492188, 40.41558837890625, 17.364654541015625, 28.200286865234375, 36.62193298339844, 8.905441284179688, 24.217613220214844, 15.069129943847656, 22.716522216796875, 57.22441101074219, 26.174707412719727, 9.776311874389648, 13.907623291015625, 40.11912536621094, 6.655418395996094, 54.1395263671875, 23.743513107299805, 2.8272705078125, -9.395263671875, 2.086498260498047, 19.624267578125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000268.npy"} +{"epoch": 0.3935389133627019, "step": 269, "batch_size": 64, "mean": 32.81420135498047, "std": 23.66851234436035, "min": -9.059440612792969, "p10": -1.9296653747558592, "median": 34.81541061401367, "p90": 63.82899703979493, "max": 81.58445739746094, "pos_frac": 0.875, "sample": [32.460693359375, 72.91352844238281, 29.893020629882812, 14.903450012207031, 12.577384948730469, 32.28845977783203, 53.20514678955078, 26.081249237060547, 35.113624572753906, 0.45900726318359375, 40.40126419067383, -2.0426025390625, 59.04075622558594, 40.406402587890625, 35.10608673095703, 45.215576171875, 40.877227783203125, 14.565628051757812, 19.368911743164062, -6.355400085449219, 45.046875, 29.01424789428711, 40.65345001220703, -8.331737518310547, 16.00674819946289, 35.72052001953125, 34.52473449707031, 48.01289367675781, 81.58445739746094, 54.01800537109375, 38.82213592529297, 19.089752197265625, 67.23635864257812, 51.99740982055664, -4.85577392578125, 53.02876281738281, 62.52947998046875, 41.15204620361328, 53.746063232421875, 31.31585693359375, 1.3419837951660156, 35.47694396972656, 7.579254150390625, -5.2621917724609375, 24.128803253173828, 64.45791625976562, 48.59710693359375, 52.142791748046875, 61.165191650390625, 8.705413818359375, 51.198204040527344, 11.646102905273438, 24.546615600585938, 64.38593292236328, 6.419164657592773, 81.3481216430664, 25.53264617919922, -9.059440612792969, -1.6661453247070312, 18.711692810058594, 51.28407287597656, 68.6515884399414, -6.161827087402344, 28.147232055664062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000269.npy"} +{"epoch": 0.39500734214390604, "step": 270, "batch_size": 64, "mean": 25.06671142578125, "std": 22.241985321044922, "min": -13.552497863769531, "p10": 1.032383155822754, "median": 21.128607749938965, "p90": 57.74253692626953, "max": 83.53317260742188, "pos_frac": 0.90625, "sample": [25.29145050048828, 49.98212432861328, 60.32280731201172, -9.736665725708008, -8.443367004394531, 20.713043212890625, 25.510433197021484, 21.63562774658203, 33.43693542480469, 59.436126708984375, 57.327125549316406, 10.141830444335938, 17.42125701904297, 17.762908935546875, 13.736289978027344, 6.158958435058594, 22.14244842529297, 16.70757293701172, 13.886787414550781, 29.886810302734375, 58.476959228515625, 18.177967071533203, 17.82117462158203, 48.79023742675781, 52.0050048828125, 32.20674133300781, 1.192556381225586, 9.772438049316406, 28.288604736328125, 23.58264923095703, -13.552497863769531, 54.879478454589844, 79.95649719238281, 0.9637374877929688, 8.780586242675781, 35.373321533203125, 36.19830322265625, 8.192087173461914, 22.742691040039062, 41.627376556396484, 83.53317260742188, 6.9471588134765625, 23.4072265625, -7.655735015869141, 13.300666809082031, 21.544172286987305, 5.683372497558594, 50.193878173828125, 8.769317626953125, 15.109066009521484, 29.548782348632812, 8.627296447753906, 7.053466796875, -3.112092971801758, 14.095001220703125, 24.92767333984375, 6.543373107910156, 19.25641632080078, -4.127040863037109, 57.920570373535156, 43.700233459472656, 70.39450073242188, 6.908557891845703, 52.904083251953125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000270.npy"} +{"epoch": 0.3964757709251101, "step": 271, "batch_size": 64, "mean": 25.931888580322266, "std": 23.311038970947266, "min": -20.83026885986328, "p10": 0.8072496414184573, "median": 22.65183448791504, "p90": 63.80620422363282, "max": 79.33973693847656, "pos_frac": 0.9375, "sample": [72.15875244140625, 34.0384521484375, 34.214820861816406, 8.855491638183594, 31.994400024414062, 23.710674285888672, 41.16450500488281, 53.534027099609375, 30.35863494873047, -1.4113235473632812, 28.539749145507812, -19.013473510742188, 64.443603515625, 21.862838745117188, 8.179893493652344, 4.1139373779296875, 32.59498596191406, 22.888931274414062, 10.700187683105469, 2.2251815795898438, 1.090047836303711, 11.240280151367188, 55.536285400390625, 51.60809326171875, 65.04513549804688, 59.52386474609375, 26.053741455078125, 11.416147232055664, 21.562353134155273, 35.307960510253906, -6.151464462280273, 0.6860504150390625, 16.930713653564453, 7.7012176513671875, 3.685821533203125, 36.70769500732422, 0.34384918212890625, 0.5927753448486328, 7.4311981201171875, 11.54275894165039, 20.188880920410156, 79.33973693847656, 22.414737701416016, 3.446268081665039, 34.59203338623047, 11.684429168701172, 27.005889892578125, 25.713623046875, 66.71514892578125, 36.21516418457031, 71.32414245605469, 21.197715759277344, 10.666740417480469, 1.4350128173828125, 62.318939208984375, -20.83026885986328, 41.715087890625, 72.1309585571289, 30.630271911621094, 45.94264602661133, 5.167152404785156, 34.44013977050781, 15.465309143066406, 17.712379455566406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000271.npy"} +{"epoch": 0.39794419970631423, "step": 272, "batch_size": 64, "mean": 31.53685760498047, "std": 27.45886993408203, "min": -29.17863655090332, "p10": -3.6873443603515614, "median": 31.681678771972656, "p90": 67.66065368652345, "max": 112.99320983886719, "pos_frac": 0.859375, "sample": [31.70777130126953, 4.961082458496094, 19.824926376342773, -8.124610900878906, 9.972206115722656, 65.50131225585938, 9.1409912109375, 62.99524688720703, 73.58348083496094, 34.58984375, 61.622474670410156, 60.28224182128906, -2.4966888427734375, 22.02768325805664, -29.17863655090332, 17.256074905395508, 9.486648559570312, 35.090240478515625, 17.851531982421875, 36.18865966796875, -6.8644256591796875, 52.60939025878906, 40.93701171875, 42.26239013671875, 21.189834594726562, 15.555923461914062, 6.045011520385742, 81.1505126953125, 30.429466247558594, 112.99320983886719, 31.1412353515625, 35.88232421875, 17.56741714477539, 37.692962646484375, 11.349634170532227, 40.499839782714844, 67.8832778930664, -6.183006286621094, 36.69297790527344, 31.65558624267578, 70.87173461914062, 19.106597900390625, 47.14544677734375, 30.157094955444336, 47.103851318359375, 4.978767395019531, 60.13812255859375, 11.986907958984375, 71.50845336914062, 56.048667907714844, -15.130363464355469, 33.779109954833984, -4.1627044677734375, -6.486228942871094, -2.5781707763671875, 65.07135009765625, 31.2357234954834, 41.22489547729492, 67.14119720458984, 5.023153305053711, 34.983802795410156, 9.211284637451172, 70.8763427734375, 36.350921630859375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000272.npy"} +{"epoch": 0.39941262848751835, "step": 273, "batch_size": 64, "mean": 30.293479919433594, "std": 25.976314544677734, "min": -21.275039672851562, "p10": 3.9777818679809576, "median": 27.245826721191406, "p90": 66.43413619995118, "max": 104.26617431640625, "pos_frac": 0.9375, "sample": [54.60099792480469, 46.90477752685547, 4.7202911376953125, 34.38129425048828, 71.61015319824219, 104.26617431640625, 10.255950927734375, 20.53651237487793, 74.49371337890625, -1.1814117431640625, 44.94261932373047, 22.505722045898438, 40.31629180908203, 17.359827041625977, -7.96088981628418, 27.894485473632812, 3.693052291870117, 6.230022430419922, 31.252864837646484, 49.684410095214844, 44.28277587890625, 12.069803237915039, 12.9334716796875, 24.41863441467285, 5.803224563598633, 24.0953369140625, 43.135986328125, 9.22867202758789, 18.491989135742188, 35.83177947998047, 21.171417236328125, 8.968132019042969, 62.202674865722656, 72.41925048828125, 10.778142929077148, 76.97055053710938, 59.209625244140625, 31.592926025390625, 4.64215087890625, 50.36456298828125, -13.782646179199219, 27.47735595703125, 30.521682739257812, 6.264036178588867, 48.624603271484375, 45.46397399902344, 54.04327392578125, 15.42103385925293, 8.268486022949219, -21.275039672851562, 95.13349914550781, 5.812938690185547, 28.988182067871094, 27.014297485351562, 39.56731414794922, 68.24761962890625, 11.941299438476562, 3.421894073486328, 0.351776123046875, 12.733123779296875, 51.5084228515625, 8.970321655273438, 47.048736572265625, 51.898529052734375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000273.npy"} +{"epoch": 0.4008810572687225, "step": 274, "batch_size": 64, "mean": 26.310405731201172, "std": 23.380788803100586, "min": -27.78002166748047, "p10": -0.22514152526855435, "median": 23.027732849121094, "p90": 62.186992645263686, "max": 85.86000061035156, "pos_frac": 0.890625, "sample": [24.165502548217773, 4.428655624389648, 9.484870910644531, -1.256134033203125, 41.164154052734375, 25.70305633544922, 26.732948303222656, -0.362030029296875, 6.810455322265625, 14.35443115234375, 19.86908531188965, 57.92694854736328, 65.65574645996094, 39.56062316894531, 28.63452911376953, 24.262237548828125, 34.66749572753906, 55.47050476074219, -9.763893127441406, 31.95049285888672, 32.56658172607422, 29.835987091064453, 17.2908935546875, 37.11516189575195, -27.78002166748047, 54.42103576660156, 70.4631118774414, 4.117706298828125, 9.881729125976562, 85.86000061035156, 0.09426498413085938, 22.25469970703125, 14.71529769897461, 13.062477111816406, 18.33378028869629, 43.33271789550781, 76.5072021484375, 19.279647827148438, 54.20478820800781, -1.564544677734375, 72.06402587890625, 12.367317199707031, 33.59979248046875, 18.552570343017578, 48.898704528808594, 1.4120635986328125, 26.184860229492188, 20.349302291870117, 18.306720733642578, 69.81637573242188, 33.35649108886719, 3.9633026123046875, 1.0025749206542969, 22.339988708496094, 0.9286117553710938, 35.949737548828125, -0.9976119995117188, -3.6458377838134766, 23.715476989746094, 26.024124145507812, 16.297866821289062, 49.318206787109375, 16.596481323242188, 64.01272583007812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000274.npy"} +{"epoch": 0.4023494860499266, "step": 275, "batch_size": 64, "mean": 27.70269775390625, "std": 24.43610954284668, "min": -12.729217529296875, "p10": 0.024624252319336737, "median": 24.688678741455078, "p90": 59.178755187988294, "max": 101.51547241210938, "pos_frac": 0.890625, "sample": [46.93296813964844, 19.841800689697266, 26.452510833740234, 9.012855529785156, 7.557180404663086, 11.201332092285156, 61.39178466796875, 14.333137512207031, 82.439208984375, 12.870922088623047, 31.855361938476562, 49.428165435791016, 76.53228759765625, 33.145965576171875, 14.319602966308594, 42.917266845703125, -0.3014030456542969, 14.356338500976562, 5.8685455322265625, 32.44673156738281, 7.703041076660156, 39.581764221191406, 101.51547241210938, 5.200706481933594, 21.02581787109375, 5.454261779785156, 26.16167449951172, 41.37837219238281, 55.784637451171875, 24.356788635253906, -2.924112319946289, 7.8000946044921875, 40.73353576660156, 28.654434204101562, 31.24763298034668, 8.707229614257812, -3.0793380737304688, 16.178817749023438, -0.4181556701660156, 10.558250427246094, 25.322402954101562, 18.96405029296875, -2.7078933715820312, 4.976314544677734, 43.35947799682617, 45.11488342285156, 44.78546142578125, 84.48091125488281, 47.021995544433594, 79.42434692382812, 49.233970642089844, -12.614561080932617, 44.47108459472656, 60.63337707519531, 0.7853546142578125, 31.304595947265625, 21.51667022705078, 39.0224494934082, 3.5300064086914062, 21.92562484741211, 18.13029670715332, 33.77692413330078, -12.729217529296875, 25.02056884765625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000275.npy"} +{"epoch": 0.40381791483113066, "step": 276, "batch_size": 64, "mean": 25.469282150268555, "std": 24.726295471191406, "min": -15.752334594726562, "p10": -2.4194774627685547, "median": 22.903494834899902, "p90": 60.613398742675784, "max": 88.23272705078125, "pos_frac": 0.8125, "sample": [25.243629455566406, 88.23272705078125, 30.29239273071289, 50.563751220703125, 67.23814392089844, 30.986663818359375, -2.4665298461914062, 65.28619384765625, 4.04823112487793, 38.435970306396484, 7.08256721496582, 17.854835510253906, -2.1160202026367188, 22.886188507080078, 32.044776916503906, 35.2022705078125, -9.827133178710938, -1.7466049194335938, 7.9458465576171875, 11.359180450439453, 5.579643249511719, 17.783370971679688, 7.627769470214844, 46.320526123046875, 14.126083374023438, -2.3096885681152344, 3.3241615295410156, 34.28201675415039, 22.920801162719727, 37.37627410888672, -8.292961120605469, 19.739253997802734, 30.037208557128906, 35.59776306152344, 52.58741760253906, 16.928138732910156, 12.041580200195312, -3.847686767578125, -1.2893199920654297, 17.335254669189453, 28.291101455688477, 10.026351928710938, 0.9770164489746094, 61.26026916503906, 48.95280456542969, -2.9015769958496094, 43.891845703125, 32.35105895996094, 49.45192337036133, 27.231807708740234, 62.40791320800781, 5.961460113525391, 58.332664489746094, 32.58086395263672, -1.6673049926757812, 56.18377685546875, 24.984445571899414, 5.350929260253906, 59.104034423828125, -7.0157623291015625, -15.752334594726562, 77.5628662109375, 14.88133430480957, 81.17184448242188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000276.npy"} +{"epoch": 0.4052863436123348, "step": 277, "batch_size": 64, "mean": 24.34532928466797, "std": 19.181516647338867, "min": -7.5345458984375, "p10": 3.687131690979004, "median": 19.123876571655273, "p90": 52.39681053161621, "max": 82.7918701171875, "pos_frac": 0.9375, "sample": [11.456230163574219, 28.641056060791016, 3.5633506774902344, 19.697906494140625, 12.278209686279297, 36.22100067138672, 35.2236328125, 30.26581382751465, 7.1704864501953125, 38.412841796875, 25.347259521484375, 7.754796981811523, 42.625465393066406, -1.7754135131835938, 68.79217529296875, 8.429031372070312, 3.975954055786133, 11.108383178710938, 35.806373596191406, 14.519355773925781, 17.362979888916016, 29.885726928710938, 12.451362609863281, 35.131385803222656, -7.5345458984375, 26.381683349609375, 57.14491271972656, 41.417022705078125, 9.793174743652344, 24.146270751953125, -0.44715118408203125, 82.7918701171875, 10.424606323242188, 53.45660400390625, 7.942512512207031, 7.890438079833984, 49.11711883544922, 35.75077819824219, 16.220348358154297, 8.433746337890625, 38.82197570800781, 30.857704162597656, 58.850372314453125, 13.122905731201172, 1.9441909790039062, 18.549846649169922, 17.839263916015625, 52.415836334228516, 32.03565979003906, 38.88933563232422, -2.2039794921875, 45.22306823730469, 26.017847061157227, 1.9530391693115234, 10.929161071777344, 4.065105438232422, 19.906150817871094, 14.911758422851562, 52.3524169921875, 14.873245239257812, 28.65604019165039, 59.21185302734375, 10.300256729125977, 11.30316162109375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000277.npy"} +{"epoch": 0.4067547723935389, "step": 278, "batch_size": 64, "mean": 29.894882202148438, "std": 26.637422561645508, "min": -7.059478759765625, "p10": -1.9712816238403312, "median": 24.126537322998047, "p90": 67.29287185668946, "max": 94.26127624511719, "pos_frac": 0.875, "sample": [7.785501480102539, 68.02751159667969, 1.5127067565917969, 23.238346099853516, 22.30810546875, 40.51075744628906, 23.056838989257812, 17.836627960205078, 57.47923278808594, 24.294998168945312, 6.20220947265625, 30.109298706054688, 32.57958984375, -2.280517578125, 63.96656799316406, 23.054046630859375, -2.7885093688964844, 65.5787124633789, 69.22421264648438, -5.2517242431640625, 19.615507125854492, 93.5772705078125, 55.888427734375, 21.07872772216797, 35.24510955810547, 58.16688537597656, 39.91254425048828, 1.8573970794677734, 23.95807647705078, 91.48892974853516, 71.63671112060547, -6.987937927246094, 32.768836975097656, -5.729461669921875, 62.204742431640625, 57.40221405029297, 7.08319091796875, 6.64801025390625, 1.7829418182373047, 48.48305892944336, 17.27700424194336, 94.26127624511719, 22.830785751342773, -7.059478759765625, 14.585189819335938, 71.77603912353516, 13.580986022949219, 29.906631469726562, 28.47890853881836, 42.42030334472656, 24.348979949951172, 24.45368766784668, 12.816486358642578, 3.2158126831054688, 36.52904510498047, 53.21549987792969, -3.0003490447998047, -1.2497310638427734, 52.480743408203125, 2.947509765625, 50.17509078979492, 35.52595520019531, 5.9857330322265625, 5.244565963745117], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000278.npy"} +{"epoch": 0.40822320117474303, "step": 279, "batch_size": 64, "mean": 29.428205490112305, "std": 24.46150016784668, "min": -15.758121490478516, "p10": -2.6836544036865226, "median": 25.190579414367676, "p90": 61.04395980834961, "max": 83.1346206665039, "pos_frac": 0.84375, "sample": [5.584712982177734, 11.74264907836914, 18.383487701416016, 27.325271606445312, -7.908416748046875, -4.574981689453125, 64.70304870605469, 59.63262939453125, 18.786911010742188, 5.552082061767578, -0.0669708251953125, 45.66423034667969, 36.82952117919922, 24.287519454956055, 69.5130615234375, 61.62812805175781, 5.808704376220703, 24.564788818359375, 59.68090057373047, 20.303878784179688, 35.073211669921875, 42.05718231201172, 14.620977401733398, -2.9467391967773438, 13.749210357666016, 25.575571060180664, 24.805587768554688, 38.70781707763672, -1.506195068359375, 83.1346206665039, 17.1292724609375, 44.283843994140625, 46.704345703125, 29.92267417907715, 44.25089645385742, 54.50499725341797, 15.514015197753906, 80.800048828125, 63.20375442504883, 47.613426208496094, 8.700538635253906, -3.6002731323242188, 5.146337509155273, -4.90655517578125, 14.934900283813477, 56.263763427734375, 20.334346771240234, 39.544776916503906, 40.074485778808594, 16.515369415283203, 52.25187683105469, 45.98731994628906, 77.4427490234375, -15.758121490478516, 57.23698425292969, 4.841835021972656, 42.672630310058594, 21.833023071289062, -3.7608070373535156, 6.095420837402344, 44.45280456542969, 43.40995788574219, -2.0697898864746094, 51.12186813354492], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000279.npy"} +{"epoch": 0.40969162995594716, "step": 280, "batch_size": 64, "mean": 28.934520721435547, "std": 18.74156379699707, "min": -6.62939453125, "p10": 2.055599212646486, "median": 30.226879119873047, "p90": 50.63310089111329, "max": 76.24617004394531, "pos_frac": 0.90625, "sample": [26.580703735351562, 35.34193420410156, 14.526084899902344, 38.76576232910156, 14.613258361816406, 1.3856735229492188, 41.67793273925781, 34.73548889160156, 53.092132568359375, 7.38140869140625, 37.14421081542969, 23.202789306640625, 10.90420913696289, 34.26268768310547, 51.03028869628906, 44.01527404785156, 9.676168441772461, 76.24617004394531, 25.847999572753906, 34.44377136230469, -0.9027538299560547, 66.05604553222656, 60.9434700012207, 38.1644172668457, 43.506141662597656, 22.529582977294922, 27.939743041992188, 37.48335266113281, 5.60986328125, 52.38963317871094, 49.706329345703125, 34.696495056152344, 39.181365966796875, 35.91777801513672, -4.045654296875, 14.282835006713867, 26.269912719726562, 71.18549346923828, 19.925552368164062, 48.11559295654297, 28.707138061523438, 3.6187591552734375, -6.5164794921875, 30.751617431640625, -2.276700973510742, -6.62939453125, 23.37335205078125, 35.613746643066406, 15.400711059570312, 35.56690979003906, 39.15865707397461, 46.841796875, 31.12880516052246, 28.020734786987305, -0.4383049011230469, 6.722282409667969, 19.53863525390625, 24.615707397460938, 25.875946044921875, 42.180938720703125, 44.99161911010742, 12.909683227539062, 39.12194061279297, 29.70214080810547], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000280.npy"} +{"epoch": 0.4111600587371512, "step": 281, "batch_size": 64, "mean": 25.293540954589844, "std": 20.211898803710938, "min": -41.6973876953125, "p10": 3.5543533325195327, "median": 22.929208755493164, "p90": 50.643093872070324, "max": 82.52790832519531, "pos_frac": 0.9375, "sample": [15.870079040527344, 68.3391342163086, 26.05425262451172, 40.823272705078125, 21.858882904052734, 9.239913940429688, 10.048812866210938, 21.674175262451172, 29.41005516052246, 29.537065505981445, 29.555397033691406, 45.8209228515625, 41.69766616821289, 9.867616653442383, 35.29371643066406, -4.5254974365234375, 31.998809814453125, 47.66998291015625, 11.997840881347656, 20.764602661132812, 34.04314422607422, 20.340057373046875, 31.843704223632812, 33.991920471191406, 15.601806640625, 23.999534606933594, 41.16786193847656, 63.808448791503906, -3.8612403869628906, 46.77635955810547, 51.775779724121094, 13.241357803344727, 26.557889938354492, 55.08457946777344, 13.918014526367188, -41.6973876953125, 17.592723846435547, 16.34920883178711, 37.36628723144531, 12.272571563720703, 82.52790832519531, 14.701522827148438, 5.2716217041015625, 24.28582000732422, 30.473003387451172, 59.244606018066406, 12.7633056640625, 17.275527954101562, 44.26875305175781, 39.05131530761719, 10.291160583496094, 26.334564208984375, -2.9491729736328125, 2.9751129150390625, 4.905914306640625, 15.708786010742188, 0.48482704162597656, 52.926116943359375, 29.401412963867188, 11.52186393737793, 14.035318374633789, 20.02996826171875, 2.0878353118896484, 48.000160217285156], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000281.npy"} +{"epoch": 0.41262848751835535, "step": 282, "batch_size": 64, "mean": 23.41071128845215, "std": 20.938098907470703, "min": -7.799657821655273, "p10": 2.514866828918457, "median": 22.222238540649414, "p90": 47.915639877319336, "max": 117.37347412109375, "pos_frac": 0.921875, "sample": [47.8800048828125, 6.143756866455078, 11.262611389160156, 6.712242126464844, 3.8667354583740234, 54.040626525878906, 25.3247127532959, 21.48503875732422, 7.822479248046875, 30.65453338623047, 47.930912017822266, 8.059188842773438, 64.7446060180664, 18.327180862426758, 6.456031799316406, 3.2026329040527344, 117.37347412109375, 17.492889404296875, 11.665023803710938, 25.14141845703125, 13.7554931640625, 29.705245971679688, 22.737743377685547, -0.552764892578125, 49.51835632324219, 58.83075714111328, 32.43724060058594, 65.28611755371094, 39.772464752197266, 31.292556762695312, 40.57150650024414, 0.5631599426269531, 32.31621551513672, -6.948308944702148, 24.26980209350586, 27.972782135009766, 2.5384521484375, 23.90148162841797, 13.090744018554688, 19.03350830078125, -7.51641845703125, 25.86650848388672, 44.47351837158203, 30.491348266601562, 43.45787048339844, 15.630325317382812, 5.204498291015625, 25.537532806396484, 28.933372497558594, 12.947898864746094, 2.504758834838867, 23.861427307128906, 25.64655303955078, 21.70673370361328, 22.79743194580078, 5.48150634765625, 18.4498291015625, 13.044347763061523, -7.799657821655273, 39.60932159423828, 9.796714782714844, 11.061355590820312, -2.2293777465820312, 33.64945983886719], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000282.npy"} +{"epoch": 0.41409691629955947, "step": 283, "batch_size": 64, "mean": 28.11335563659668, "std": 23.548233032226562, "min": -20.06414794921875, "p10": 1.1320089340209976, "median": 23.23712158203125, "p90": 59.86843490600587, "max": 92.87501525878906, "pos_frac": 0.90625, "sample": [8.051921844482422, 15.462547302246094, 29.424713134765625, 2.5974082946777344, 22.425491333007812, 68.64222717285156, 16.577117919921875, 25.639801025390625, 81.92022705078125, -6.277702331542969, -2.8098068237304688, 42.04200744628906, -16.724796295166016, 19.265655517578125, 10.618026733398438, 11.116004943847656, -20.06414794921875, 18.10580825805664, 19.755722045898438, 7.623889923095703, 92.87501525878906, 54.31073760986328, 14.616539001464844, 52.00151062011719, 20.438396453857422, 24.048751831054688, -0.8374500274658203, 57.91706848144531, 21.00098419189453, 64.67784118652344, 22.363189697265625, 32.044700622558594, 43.324134826660156, 21.275039672851562, 8.56903076171875, 5.828086853027344, 42.989471435546875, 10.15439224243164, 10.735322952270508, 0.5039806365966797, 25.771507263183594, 56.42485046386719, 45.43523406982422, 15.572637557983398, 43.61298370361328, 28.744831085205078, 39.098365783691406, 20.17768096923828, 53.89324951171875, 28.422874450683594, 43.668365478515625, 24.237648010253906, 44.98359680175781, 16.406375885009766, 15.081184387207031, 46.78902053833008, 35.19001770019531, 28.29975128173828, 31.316116333007812, 65.4822998046875, 75.17605590820312, 13.000015258789062, 60.704734802246094, -10.463541030883789], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000283.npy"} +{"epoch": 0.4155653450807636, "step": 284, "batch_size": 64, "mean": 29.832664489746094, "std": 25.786720275878906, "min": -16.82473373413086, "p10": 2.822159576416017, "median": 24.1427001953125, "p90": 65.36373291015626, "max": 116.6048583984375, "pos_frac": 0.921875, "sample": [-2.51507568359375, 15.300451278686523, 44.109161376953125, 18.48926544189453, 23.835983276367188, 7.5369110107421875, 46.750587463378906, 19.041015625, 19.806785583496094, 56.08306884765625, 77.04557800292969, 11.31417465209961, 72.87619018554688, 10.415185928344727, 21.386802673339844, 11.48154067993164, 19.216686248779297, 32.848114013671875, 25.781333923339844, 84.88404846191406, -0.780517578125, 83.08666229248047, 33.117408752441406, 49.660888671875, 16.893985748291016, 23.97838592529297, 37.54359436035156, 36.449485778808594, 40.24742126464844, -4.530113220214844, 0.6583099365234375, 116.6048583984375, 14.760881423950195, 5.77281379699707, 2.2793502807617188, 41.188636779785156, -0.4029693603515625, 21.998313903808594, 24.30701446533203, 91.96270751953125, 65.53076171875, 9.84478759765625, 33.3004150390625, 24.7862606048584, 6.961235046386719, -16.82473373413086, 23.60240936279297, 64.9739990234375, 15.497283935546875, 26.93720817565918, 56.72218322753906, 26.332542419433594, 5.150230407714844, 50.16316223144531, 30.56112289428711, 10.911062240600586, 19.312301635742188, 4.088714599609375, 49.14697265625, 43.45785903930664, 6.949832916259766, 45.668182373046875, 31.11215591430664, 24.619674682617188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000284.npy"} +{"epoch": 0.4170337738619677, "step": 285, "batch_size": 64, "mean": 26.457977294921875, "std": 21.147918701171875, "min": -11.735397338867188, "p10": 2.6115470886230474, "median": 24.644168853759766, "p90": 49.71286468505861, "max": 94.62911987304688, "pos_frac": 0.953125, "sample": [38.514320373535156, 23.131061553955078, 13.552196502685547, 45.744720458984375, 9.878952026367188, 25.775970458984375, 36.977134704589844, 6.5964813232421875, 44.64468002319336, 34.194244384765625, 38.662078857421875, 32.47928237915039, 39.29377746582031, 6.766761779785156, 36.424095153808594, 77.13966369628906, 5.751472473144531, 2.370513916015625, 60.246307373046875, 61.536224365234375, 29.698387145996094, 7.407249450683594, 16.60192108154297, 28.227256774902344, 34.76154327392578, 8.706413269042969, 17.559490203857422, 38.65263366699219, 22.964202880859375, 3.2066726684570312, 33.174110412597656, 0.13791656494140625, -7.522743225097656, 9.953998565673828, 40.01911926269531, 77.57769775390625, 15.38571548461914, -11.735397338867188, 32.93841552734375, 8.09326171875, 21.733732223510742, 5.588775634765625, 94.62911987304688, 35.001495361328125, 0.25093841552734375, 59.02497863769531, 33.087364196777344, 20.987834930419922, 40.112876892089844, 51.41349792480469, 3.1739578247070312, 7.317173004150391, 25.384963989257812, 17.56048583984375, 1.49395751953125, 11.291202545166016, 42.85215759277344, -1.0024642944335938, 19.46558380126953, 18.369064331054688, 28.31035614013672, 44.29877471923828, 43.57347106933594, 23.90337371826172], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000285.npy"} +{"epoch": 0.4185022026431718, "step": 286, "batch_size": 64, "mean": 27.725364685058594, "std": 25.633079528808594, "min": -27.53948211669922, "p10": -1.8357982635498047, "median": 25.327802658081055, "p90": 63.56953430175781, "max": 109.6821517944336, "pos_frac": 0.859375, "sample": [14.88812255859375, -0.89605712890625, 109.6821517944336, 25.658153533935547, 14.345996856689453, 33.22942352294922, 8.185127258300781, 17.891006469726562, 37.284202575683594, 11.231475830078125, 38.973880767822266, 43.351470947265625, 9.54583740234375, 8.354990005493164, 32.45000457763672, 8.747268676757812, 72.82965087890625, 1.5756759643554688, 24.507781982421875, 46.17564010620117, 80.49715423583984, 14.721492767333984, -27.53948211669922, 68.19483947753906, 24.805068969726562, -14.650360107421875, -1.8462409973144531, 45.38847351074219, 20.24078369140625, 54.27162170410156, 63.782257080078125, 3.4448089599609375, 30.73078155517578, 48.0643310546875, 24.997451782226562, 47.0609245300293, 31.683761596679688, 52.32743835449219, 9.149856567382812, 2.7277984619140625, 35.87110900878906, 12.688468933105469, 33.283447265625, -4.8521881103515625, -3.641143798828125, 28.694156646728516, 58.026527404785156, 23.00705909729004, -2.7880802154541016, 71.39314270019531, 4.640697479248047, 23.049880981445312, 2.506959915161133, 26.87957763671875, 33.59052276611328, 63.07318115234375, -3.906097412109375, 28.469745635986328, 23.21652603149414, -1.811431884765625, 61.13905334472656, 65.24282836914062, 26.346954345703125, 34.23784637451172], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000286.npy"} +{"epoch": 0.4199706314243759, "step": 287, "batch_size": 64, "mean": 26.86972427368164, "std": 25.97897720336914, "min": -17.612552642822266, "p10": 1.4858345031738291, "median": 18.39208984375, "p90": 60.12953605651856, "max": 112.10391235351562, "pos_frac": 0.90625, "sample": [56.275352478027344, 18.3294677734375, 14.869543075561523, 6.812858581542969, 26.140409469604492, 12.449142456054688, 52.99696350097656, 87.89054870605469, 45.3304328918457, 11.095855712890625, 61.34242248535156, 24.934961318969727, 4.117837905883789, 18.4547119140625, 6.873048782348633, -0.8361968994140625, 46.13102722167969, 19.812240600585938, 51.45808410644531, 33.29139709472656, 9.597370147705078, 3.9255943298339844, 44.280067443847656, -12.177886962890625, 112.10391235351562, 14.018383026123047, 18.161779403686523, 25.786590576171875, 31.145416259765625, 1.0771408081054688, 27.7557373046875, 47.534698486328125, -2.6238632202148438, 3.5701446533203125, 15.526386260986328, 39.182464599609375, 13.097404479980469, 33.88426208496094, 6.643245697021484, 16.076522827148438, 7.634927749633789, 15.189537048339844, 3.8988113403320312, 19.64072608947754, 8.473464965820312, 40.17707061767578, 2.439453125, 8.122329711914062, 80.27330017089844, 18.163726806640625, -11.687057495117188, 11.978103637695312, 60.79005813598633, 46.35133361816406, 49.769683837890625, -1.9252185821533203, -17.612552642822266, 30.97671127319336, 5.452934265136719, 41.131141662597656, 71.69679260253906, 78.71151733398438, 45.09182357788086, 58.58831787109375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000287.npy"} +{"epoch": 0.42143906020558003, "step": 288, "batch_size": 64, "mean": 27.80404281616211, "std": 21.588449478149414, "min": -10.430191040039062, "p10": 0.9680755615234381, "median": 24.7489013671875, "p90": 58.68895874023438, "max": 77.08822631835938, "pos_frac": 0.90625, "sample": [42.943321228027344, 33.6964111328125, -9.830127716064453, 34.16639709472656, 43.471527099609375, 66.44615173339844, -4.200315475463867, 73.76081848144531, 7.517604827880859, 55.27589797973633, 11.292610168457031, 25.0731201171875, 2.991729736328125, 17.855979919433594, 16.712711334228516, 27.822792053222656, 40.30011749267578, 27.799301147460938, 10.86016845703125, 36.57148742675781, 21.87436294555664, 48.147308349609375, 24.4246826171875, 11.787864685058594, 39.86860275268555, 33.829017639160156, 64.9564208984375, 15.917535781860352, 21.295257568359375, 46.97882080078125, 15.378379821777344, 10.13333511352539, 45.285682678222656, 40.73048400878906, 40.348968505859375, 32.31626892089844, 16.371536254882812, 3.3149871826171875, 31.87244415283203, 10.110836029052734, -6.699821472167969, 77.08822631835938, 16.639026641845703, 32.19758605957031, -1.6305465698242188, 24.012733459472656, 22.300003051757812, 9.245361328125, 71.54544067382812, 23.423744201660156, 56.03919982910156, 45.3243408203125, 34.85813903808594, 1.5783538818359375, 13.963520050048828, 62.58198547363281, 31.063873291015625, -1.94403076171875, 10.909236907958984, 54.39569091796875, 0.7065277099609375, 59.82456970214844, -10.430191040039062, 16.99525260925293], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000288.npy"} +{"epoch": 0.42290748898678415, "step": 289, "batch_size": 64, "mean": 30.28182601928711, "std": 29.362388610839844, "min": -13.456769943237305, "p10": 1.5122701644897463, "median": 24.460418701171875, "p90": 73.26582870483402, "max": 115.80284118652344, "pos_frac": 0.90625, "sample": [27.581722259521484, 24.37823486328125, 82.82879638671875, 93.83366394042969, 4.158674240112305, 55.04747009277344, 47.44062805175781, 10.270034790039062, 22.841262817382812, 76.89718627929688, 5.7475738525390625, 37.35865783691406, 29.660388946533203, 108.11963653564453, 62.498931884765625, 47.085750579833984, 34.75776672363281, 98.66001892089844, -0.45201873779296875, 41.016319274902344, 17.226125717163086, 3.4049434661865234, 33.385101318359375, 10.132152557373047, 2.454465866088867, 33.60587692260742, 47.602596282958984, 28.214065551757812, 13.844673156738281, 9.747987747192383, 46.37474060058594, 65.67088317871094, 41.799278259277344, 1.583749771118164, 5.784934997558594, 5.950447082519531, 115.80284118652344, 7.536670684814453, 1.4816360473632812, -4.008930206298828, 23.30004119873047, 52.90167236328125, 1.9136428833007812, 46.076148986816406, -4.140459060668945, 45.36558532714844, 76.52080535888672, 30.309778213500977, 18.597230911254883, -8.476062774658203, 11.438827514648438, 24.5426025390625, -13.456769943237305, 4.088552474975586, 47.148193359375, 19.366485595703125, 7.157197952270508, 12.285484313964844, 2.6164398193359375, 38.55750274658203, 51.950103759765625, -3.0475921630859375, 39.34112548828125, 16.35541534423828], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000289.npy"} +{"epoch": 0.4243759177679883, "step": 290, "batch_size": 64, "mean": 31.53160858154297, "std": 29.577234268188477, "min": -17.622474670410156, "p10": -0.9667409896850552, "median": 27.486618041992188, "p90": 65.35994110107423, "max": 134.90945434570312, "pos_frac": 0.890625, "sample": [39.851898193359375, 27.346088409423828, 28.85039520263672, 22.21839141845703, -14.391975402832031, 26.575775146484375, 71.28742980957031, -4.651008605957031, 25.350112915039062, 9.598655700683594, 41.453941345214844, 57.676612854003906, 15.449005126953125, 9.790557861328125, 29.762168884277344, 45.83139419555664, 58.73046875, 28.93714141845703, 13.182159423828125, 28.002918243408203, 5.123687744140625, 35.09388732910156, -17.622474670410156, 40.593814849853516, 8.961524963378906, -14.289924621582031, 13.722515106201172, 27.02367401123047, 92.247802734375, 93.97775268554688, 2.404672622680664, 84.27133178710938, 41.79753112792969, 53.91371154785156, 38.16654968261719, -5.543405532836914, 58.828277587890625, 8.427810668945312, 30.15900421142578, 14.673233032226562, 18.5128173828125, 39.345848083496094, 28.117233276367188, 44.78582763671875, 8.703117370605469, 61.89875030517578, 18.14537811279297, 4.433937072753906, 13.818412780761719, -2.411632537841797, 64.94783020019531, 65.53656005859375, 16.757783889770508, 55.803131103515625, 8.638023376464844, 90.2716064453125, 59.93951416015625, 27.627147674560547, 6.880283355712891, -4.545894622802734, 4.869037628173828, 134.90945434570312, 60.892913818359375, 17.362831115722656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000290.npy"} +{"epoch": 0.42584434654919234, "step": 291, "batch_size": 64, "mean": 27.701282501220703, "std": 22.96601104736328, "min": -23.757659912109375, "p10": 3.5265132904052745, "median": 25.02838134765625, "p90": 62.42000961303711, "max": 78.1318359375, "pos_frac": 0.953125, "sample": [7.079593658447266, 23.017845153808594, 26.9573974609375, 30.256738662719727, 1.08746337890625, 61.23486328125, 78.1318359375, 34.704994201660156, 17.176467895507812, 62.49060821533203, 22.04608154296875, 50.64453125, 67.11943817138672, 9.703453063964844, 41.381568908691406, 18.838714599609375, 64.38252258300781, 47.50257873535156, 19.247220993041992, 20.285171508789062, 26.5379638671875, 12.931510925292969, 31.320287704467773, 10.862640380859375, 49.539947509765625, 62.255279541015625, 13.89459228515625, 69.32884216308594, 35.577598571777344, 1.48162841796875, 20.487716674804688, 9.232742309570312, 47.15741729736328, 2.9619293212890625, 28.653182983398438, -20.13592529296875, 7.238960266113281, 28.48046875, -23.757659912109375, 30.893157958984375, 7.26068115234375, 47.385589599609375, 51.84901428222656, 32.04357147216797, 9.772586822509766, 2.2553043365478516, -9.117986679077148, 42.94291687011719, 9.17452621459961, 32.26427459716797, 4.843875885009766, 10.86773681640625, 76.02719116210938, 19.01476287841797, 5.907127380371094, 29.6837158203125, 38.002777099609375, 26.178802490234375, 59.4681396484375, 20.921783447265625, 11.719413757324219, 23.877960205078125, 65.97709655761719, 6.329751968383789], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000291.npy"} +{"epoch": 0.42731277533039647, "step": 292, "batch_size": 64, "mean": 29.846210479736328, "std": 24.613697052001953, "min": -8.015777587890625, "p10": 2.3450508117675786, "median": 25.066513061523438, "p90": 66.46574211120607, "max": 102.83447265625, "pos_frac": 0.921875, "sample": [32.96112060546875, 54.40960693359375, 14.789487838745117, 25.13549041748047, 2.105010986328125, 13.685523986816406, 11.4080810546875, 26.16834259033203, 40.213165283203125, -8.015777587890625, 38.465667724609375, 12.043441772460938, 55.48936462402344, 25.358596801757812, 19.13330078125, 10.579988479614258, 7.950950622558594, 94.11429595947266, 15.634185791015625, 30.035667419433594, -2.2073974609375, 74.8040771484375, 35.8939208984375, 44.75616455078125, 51.36126708984375, 102.83447265625, 63.34990310668945, 43.03892517089844, 20.844329833984375, 17.3773193359375, -1.1306610107421875, 18.1658935546875, 7.2495574951171875, 19.21917724609375, 2.9051437377929688, 13.730295181274414, 3.5580368041992188, 46.248626708984375, 24.997535705566406, 8.48065185546875, 17.893829345703125, 18.14780044555664, 38.51807403564453, 50.27299880981445, 11.104843139648438, 49.28810119628906, 44.01164245605469, 54.815879821777344, 72.31413269042969, 33.39410400390625, 7.203102111816406, 15.36578369140625, 39.94493865966797, -1.2570533752441406, 72.20927429199219, 11.704795837402344, 68.83366394042969, 26.975040435791016, 0.24353790283203125, 55.335655212402344, 67.80110168457031, 12.516735076904297, 27.956588745117188, -1.5799694061279297], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000292.npy"} +{"epoch": 0.4287812041116006, "step": 293, "batch_size": 64, "mean": 24.17245864868164, "std": 24.482250213623047, "min": -23.84355926513672, "p10": -0.8667945861816395, "median": 16.647586822509766, "p90": 60.89103240966798, "max": 91.95664978027344, "pos_frac": 0.890625, "sample": [11.172674179077148, 17.4056396484375, 13.55000114440918, 2.4453811645507812, 36.71217346191406, 23.33987045288086, 41.75286865234375, 28.382850646972656, 67.6089096069336, 6.463010787963867, 31.04344940185547, 2.5356903076171875, 13.922916412353516, -1.3410148620605469, 68.95881652832031, 1.310546875, 6.369232177734375, 57.14690399169922, -7.8769989013671875, 29.66699981689453, 43.52940368652344, -23.84355926513672, 34.24149703979492, 0.23971939086914062, 3.541238784790039, 8.457862854003906, -2.469390869140625, 9.847904205322266, 15.321674346923828, 53.34983825683594, 62.29582214355469, 21.25177001953125, 29.826765060424805, 30.183517456054688, 44.68673324584961, 7.094099044799805, 13.699211120605469, 28.18069076538086, 22.773353576660156, 8.874666213989258, 15.889533996582031, 14.021049499511719, 10.170913696289062, 37.828582763671875, 42.6539306640625, 57.613189697265625, 34.977142333984375, 76.08396911621094, 21.939098358154297, -4.5003814697265625, 12.345359802246094, -2.9459266662597656, 2.1090240478515625, 83.29667663574219, 91.95664978027344, 18.659786224365234, 11.209770202636719, 39.19528579711914, 7.24195671081543, 10.21429443359375, 5.3245849609375, 39.50017547607422, 71.90665435791016, -11.306682586669922], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000293.npy"} +{"epoch": 0.4302496328928047, "step": 294, "batch_size": 64, "mean": 27.206314086914062, "std": 26.069358825683594, "min": -9.712003707885742, "p10": -2.123927307128905, "median": 22.06829261779785, "p90": 63.07852020263673, "max": 116.46939086914062, "pos_frac": 0.859375, "sample": [35.44579315185547, 21.308204650878906, 36.5826416015625, 30.96487808227539, 17.066329956054688, 23.53872299194336, -0.7746505737304688, 18.05683135986328, 2.1463394165039062, 56.08283996582031, -1.1758575439453125, 13.100601196289062, 4.903507232666016, 60.76416015625, -3.1717376708984375, 15.125541687011719, 23.078781127929688, 75.33441162109375, 18.504596710205078, 34.24360656738281, 72.71800231933594, 0.3310089111328125, 18.423828125, 13.807138442993164, 90.86468505859375, 47.536285400390625, 31.136917114257812, 8.166162490844727, 36.74768829345703, -9.650581359863281, -5.8706512451171875, 58.70075988769531, 33.30252456665039, 79.142333984375, -9.712003707885742, 1.1448326110839844, 116.46939086914062, 22.578487396240234, 24.90369415283203, 20.709144592285156, -2.530242919921875, 18.719268798828125, 43.419410705566406, 56.971824645996094, 52.35334777832031, 33.22547912597656, 8.45156478881836, 14.921524047851562, 64.07038879394531, 2.936056137084961, 30.209033966064453, 67.90167236328125, 17.64417266845703, 22.259437561035156, 21.877147674560547, 23.766281127929688, 14.39471435546875, 37.25001525878906, 8.341554641723633, -4.55650520324707, 23.85308837890625, -7.6695709228515625, 14.226760864257812, 46.59254455566406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000294.npy"} +{"epoch": 0.43171806167400884, "step": 295, "batch_size": 64, "mean": 27.146446228027344, "std": 25.158411026000977, "min": -43.99073791503906, "p10": 1.136109733581543, "median": 22.7630615234375, "p90": 60.93475952148438, "max": 90.85751342773438, "pos_frac": 0.90625, "sample": [9.219537734985352, 20.195531845092773, 68.59066009521484, -3.930723190307617, -11.602890014648438, 55.018638610839844, 16.239582061767578, 26.179244995117188, 33.450103759765625, 17.696243286132812, 57.76734924316406, 14.411445617675781, 22.97795867919922, 47.138282775878906, 47.6109619140625, 69.63851165771484, 29.582046508789062, -5.178752899169922, 3.507793426513672, 9.949760437011719, 53.106266021728516, 15.179697036743164, 33.14961242675781, 14.927227020263672, 1.2849102020263672, 4.80657958984375, 28.048324584960938, 1.0723381042480469, 53.271728515625, 17.425628662109375, 25.008453369140625, 1.9947052001953125, -6.546154022216797, -43.99073791503906, 38.16424560546875, 69.76380920410156, 2.5818405151367188, 37.587059020996094, 19.641563415527344, 61.31636047363281, 50.92680740356445, 22.555313110351562, 32.593170166015625, 1.455535888671875, 4.848270416259766, 10.726303100585938, 46.74415588378906, 39.88587951660156, 38.89234924316406, 62.88203430175781, 22.28406524658203, 21.146514892578125, 22.646270751953125, 90.85751342773438, 45.59608459472656, 60.04435729980469, 25.482738494873047, 57.75726318359375, 22.879852294921875, 11.191726684570312, -4.237005233764648, 16.120561599731445, 76.23037719726562, 1.6075897216796875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000295.npy"} +{"epoch": 0.4331864904552129, "step": 296, "batch_size": 64, "mean": 23.47943878173828, "std": 26.585098266601562, "min": -24.808998107910156, "p10": -4.5889438629150385, "median": 18.705669403076172, "p90": 59.79341201782227, "max": 101.13671875, "pos_frac": 0.8125, "sample": [-3.862964630126953, 19.564125061035156, 18.526885986328125, 101.13671875, -12.652740478515625, 42.69024658203125, 4.30328369140625, -3.0855255126953125, 3.281463623046875, -24.808998107910156, 33.09466552734375, 2.8697357177734375, 11.800399780273438, 42.71415710449219, 15.462570190429688, -5.8051300048828125, 1.6688556671142578, 43.000640869140625, 79.15245056152344, 20.61474609375, 21.82086181640625, 4.052177429199219, -16.801406860351562, 23.788516998291016, 31.17620086669922, 60.16436767578125, -0.342987060546875, 8.682052612304688, 6.066123962402344, 12.864627838134766, 58.92784881591797, 34.927406311035156, 26.929813385009766, 35.645545959472656, -0.2759552001953125, 51.56201934814453, 56.22581100463867, -4.900077819824219, -21.474721908569336, 54.37364959716797, 12.794269561767578, 62.32050323486328, -7.006626129150391, 11.742948532104492, 73.49020385742188, 21.49262809753418, 35.491943359375, 37.60576629638672, 4.728324890136719, 15.180276870727539, 17.784286499023438, 7.6967620849609375, 12.726577758789062, 73.1053466796875, 32.335601806640625, 19.6021728515625, 17.139205932617188, 84.02251434326172, 51.76366424560547, 5.834882736206055, 37.17488098144531, 18.88445281982422, -3.06298828125, 26.75897216796875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000296.npy"} +{"epoch": 0.434654919236417, "step": 297, "batch_size": 64, "mean": 28.571863174438477, "std": 23.788049697875977, "min": -8.700481414794922, "p10": -0.4197135925292954, "median": 21.970603942871094, "p90": 69.00554275512697, "max": 86.12737274169922, "pos_frac": 0.890625, "sample": [20.454498291015625, 20.78729248046875, 64.17095947265625, 63.579017639160156, 27.87027359008789, 20.740625381469727, 63.68324279785156, 44.90374755859375, 13.196653366088867, -8.700481414794922, 24.04789924621582, 49.7204475402832, 45.566612243652344, 32.847564697265625, 71.34536743164062, 15.038175582885742, 18.493667602539062, 23.654632568359375, 20.482940673828125, -2.426698684692383, 9.315017700195312, 27.48661994934082, 12.021329879760742, 11.723356246948242, 74.00090026855469, -4.2654571533203125, 74.57288360595703, 6.418479919433594, 42.083892822265625, 21.447235107421875, 32.63086700439453, 37.70554733276367, 39.674346923828125, 41.31926345825195, 22.493972778320312, 12.012752532958984, 46.750091552734375, 14.232254028320312, 82.78335571289062, 25.89190673828125, 22.499244689941406, 20.53563690185547, 18.795059204101562, 41.079742431640625, -5.208806991577148, 0.9890823364257812, 16.660472869873047, -2.8780059814453125, 86.12737274169922, 8.347705841064453, 10.722513198852539, 71.07750701904297, 31.0606689453125, 6.162567138671875, -8.690147399902344, 8.782981872558594, 32.82467269897461, -1.0234832763671875, 73.55662536621094, 51.640350341796875, 11.63876724243164, 21.442413330078125, 20.88235092163086, 31.81888771057129], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000297.npy"} +{"epoch": 0.43612334801762115, "step": 298, "batch_size": 64, "mean": 29.80324935913086, "std": 26.625171661376953, "min": -7.7118377685546875, "p10": 2.6134834289550786, "median": 22.148807525634766, "p90": 70.22258911132813, "max": 129.1201171875, "pos_frac": 0.921875, "sample": [30.474027633666992, 18.299293518066406, 16.646930694580078, 7.9954681396484375, 6.0292510986328125, -0.65997314453125, -0.6682243347167969, 28.31842041015625, 14.306419372558594, 14.517868041992188, 17.014999389648438, 28.906639099121094, 22.62890625, 3.1578216552734375, 8.995635986328125, 19.467567443847656, 17.88994598388672, 39.54578399658203, 14.839591979980469, 17.243064880371094, 71.009765625, -4.321044921875, 41.96996307373047, 4.042552947998047, 73.23273468017578, 71.00227355957031, 12.531272888183594, 25.907684326171875, 56.0238037109375, 74.50736236572266, 14.040502548217773, -5.966104507446289, 13.416595458984375, 3.3498706817626953, 4.0664520263671875, 89.95410919189453, 69.77976989746094, 37.70594787597656, 64.80718994140625, 34.05663299560547, 21.42207145690918, 1.6790218353271484, 129.1201171875, 24.8758544921875, 33.95695495605469, 22.35076904296875, 38.80012512207031, 58.09370040893555, 21.948505401611328, 16.72248077392578, 12.9683837890625, 58.16387939453125, 70.41236877441406, 2.3801956176757812, 57.726318359375, 36.777801513671875, 39.230918884277344, 53.17778015136719, -7.7118377685546875, 17.1466064453125, 12.422698974609375, 28.3260498046875, 22.349109649658203, 59.001373291015625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000298.npy"} +{"epoch": 0.43759177679882527, "step": 299, "batch_size": 64, "mean": 31.181495666503906, "std": 28.80525779724121, "min": -16.721725463867188, "p10": -0.9073417663574218, "median": 28.28982162475586, "p90": 71.31781692504886, "max": 108.98564147949219, "pos_frac": 0.875, "sample": [-6.166229248046875, -6.36602783203125, -0.925750732421875, 33.79985809326172, 5.968650817871094, -6.007038116455078, 49.066017150878906, 30.0233154296875, 2.653942108154297, -8.733501434326172, 75.84851837158203, 44.87995147705078, 108.98564147949219, 62.537925720214844, 34.64670944213867, 107.93438720703125, -16.721725463867188, 34.379539489746094, 18.676801681518555, 21.985794067382812, 7.091526031494141, 50.558074951171875, -0.8643875122070312, 34.89929962158203, 17.635643005371094, 24.514495849609375, 75.08062744140625, 44.37410354614258, 5.807628631591797, 3.5609207153320312, 13.722715377807617, 22.16851043701172, 43.45576477050781, 55.828819274902344, 38.648956298828125, 82.4830551147461, 25.184860229492188, 59.01324462890625, 18.110061645507812, 19.09740447998047, 15.990898132324219, 1.4248886108398438, 41.72767639160156, 5.301490783691406, 60.055015563964844, 17.27239227294922, -6.360467910766602, 79.99407958984375, 38.619232177734375, 56.43292236328125, 6.9226531982421875, 0.4398040771484375, 28.319374084472656, 50.774749755859375, 26.41793441772461, 33.75963592529297, 28.260269165039062, 103.16999816894531, 41.14364242553711, 44.26061248779297, 3.024078369140625, 33.16710662841797, 13.127321243286133, 45.53227233886719], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000299.npy"} +{"epoch": 0.4390602055800294, "step": 300, "batch_size": 64, "mean": 29.6212158203125, "std": 29.87994956970215, "min": -30.886730194091797, "p10": -8.315813827514646, "median": 26.344436645507812, "p90": 76.24153747558594, "max": 90.61348724365234, "pos_frac": 0.828125, "sample": [9.7535400390625, 54.41740417480469, 52.451744079589844, 60.313720703125, 9.361114501953125, -14.52752685546875, 14.094718933105469, 76.30490112304688, 48.52039337158203, -10.196155548095703, -30.886730194091797, 16.479782104492188, 55.28107833862305, 28.158721923828125, 18.140342712402344, 29.60742950439453, 14.748222351074219, 17.357009887695312, 4.171756744384766, 6.480701446533203, 1.7303009033203125, 38.44371795654297, 22.18170166015625, -1.8777084350585938, 19.56041717529297, 90.61348724365234, -14.565216064453125, 14.126739501953125, 24.5301513671875, 13.955047607421875, 29.828100204467773, 72.61648559570312, 33.852195739746094, -9.398284912109375, 47.88612365722656, -9.345268249511719, 74.8224868774414, 36.787315368652344, 87.15936279296875, 43.303436279296875, 22.063365936279297, 44.688507080078125, 44.81022644042969, 47.572601318359375, -5.913753509521484, 38.69279861450195, 29.979480743408203, 20.432815551757812, 76.09368896484375, 81.75843048095703, 79.16168212890625, -27.566497802734375, 10.90542984008789, -0.7616062164306641, 6.361249923706055, 34.78767395019531, 49.51648712158203, 17.16912841796875, 78.0526123046875, 79.1180419921875, -5.7021484375, 44.6566047668457, 51.26496124267578, 2.3733367919921875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000300.npy"} +{"epoch": 0.44052863436123346, "step": 301, "batch_size": 64, "mean": 31.020545959472656, "std": 25.941102981567383, "min": -6.433019638061523, "p10": 1.8240493774414066, "median": 27.818946838378906, "p90": 61.87804260253906, "max": 105.52278900146484, "pos_frac": 0.921875, "sample": [14.442977905273438, 56.309478759765625, 23.758583068847656, 71.47051239013672, 42.81208801269531, 34.769317626953125, 86.7696533203125, 19.29572296142578, 41.17101287841797, -6.433019638061523, 59.0496826171875, 61.886383056640625, 43.4080810546875, 29.01318359375, 19.88379669189453, 32.185508728027344, 24.67249298095703, 53.88414001464844, 1.6886749267578125, 31.751480102539062, 105.52278900146484, 0.4216804504394531, 5.507017135620117, 3.8507843017578125, 26.246597290039062, 33.71820068359375, 25.99475860595703, 9.307846069335938, 48.7755126953125, 48.26140594482422, 61.85858154296875, 15.189987182617188, 5.968639373779297, 36.032867431640625, 13.18598747253418, 11.559942245483398, 21.15357208251953, 29.305503845214844, 5.730371475219727, 2.139923095703125, 35.98582458496094, 4.144899368286133, 8.009864807128906, 26.8212890625, 18.228111267089844, 81.176513671875, 34.607933044433594, 28.816604614257812, 58.80279541015625, -6.156318664550781, 73.90831756591797, 54.67992401123047, 43.61678695678711, 32.32378387451172, 48.62139892578125, 22.22611427307129, -3.0548782348632812, -1.8812179565429688, 102.598388671875, 15.987495422363281, -1.2042694091796875, 8.921028137207031, 5.354129791259766, 41.258628845214844], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000301.npy"} +{"epoch": 0.4419970631424376, "step": 302, "batch_size": 64, "mean": 29.15094566345215, "std": 29.601024627685547, "min": -25.776611328125, "p10": -2.318616485595701, "median": 23.703075408935547, "p90": 65.9661346435547, "max": 134.09747314453125, "pos_frac": 0.875, "sample": [22.360485076904297, 51.32550048828125, 53.563018798828125, 14.807971954345703, 3.684356689453125, 26.229036331176758, 77.89585876464844, 6.005378723144531, 11.839553833007812, 64.70065307617188, 23.226409912109375, 12.966211318969727, 7.615989685058594, -0.18387603759765625, 19.181793212890625, -6.543308258056641, 51.798065185546875, 1.1622276306152344, 50.01708221435547, 42.71928405761719, 10.278045654296875, 32.98939514160156, 36.590171813964844, 29.003456115722656, -9.489952087402344, -25.776611328125, 34.66673278808594, 66.50848388671875, 23.203798294067383, -17.523887634277344, 33.956581115722656, 24.17974090576172, 52.89789581298828, 5.80963134765625, 11.502857208251953, 98.77389526367188, 134.09747314453125, 20.416427612304688, 40.3216552734375, 20.488067626953125, -3.2335052490234375, 82.92277526855469, -15.126617431640625, 49.176666259765625, 31.170372009277344, 45.83287048339844, 9.58419418334961, 92.54571533203125, 10.727832794189453, 18.027542114257812, 24.98740577697754, 38.24488830566406, 16.301185607910156, 24.6669921875, 24.981204986572266, 29.866073608398438, 7.566444396972656, 64.3162612915039, -4.483207702636719, 86.1319580078125, 18.148681640625, 6.6305084228515625, 27.320026397705078, 22.088701248168945], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000302.npy"} +{"epoch": 0.4434654919236417, "step": 303, "batch_size": 64, "mean": 28.024972915649414, "std": 28.981969833374023, "min": -20.75140380859375, "p10": -6.834532737731931, "median": 22.473968505859375, "p90": 74.78558654785157, "max": 110.96018981933594, "pos_frac": 0.84375, "sample": [38.60797119140625, 1.6645851135253906, 1.0915756225585938, 79.94833374023438, -9.965784072875977, 22.355228424072266, 51.35832214355469, 82.13778686523438, 1.1594047546386719, 2.8379898071289062, 49.674957275390625, 5.701934814453125, 51.13279724121094, 8.437667846679688, 22.592708587646484, 8.333736419677734, 10.103233337402344, 16.421228408813477, -0.9557323455810547, 51.489593505859375, -20.75140380859375, 26.94344711303711, 15.560226440429688, 27.84991455078125, 58.28864669799805, 73.61796569824219, -4.002992630004883, 44.78886795043945, -3.5442123413085938, 19.845420837402344, -8.048049926757812, 37.24702453613281, 82.35858154296875, 19.243886947631836, 61.31201171875, 7.776031494140625, -19.656349182128906, 36.834014892578125, 41.36358642578125, 110.96018981933594, 70.66751098632812, 17.482772827148438, 31.790855407714844, 77.73860931396484, 13.17401123046875, 27.29400634765625, 36.397010803222656, 80.43003845214844, 18.82524871826172, -10.414897918701172, 32.866729736328125, 13.801910400390625, 25.952835083007812, 12.473743438720703, 25.91399574279785, 75.28599548339844, -9.26547622680664, -9.524429321289062, 21.520244598388672, 3.6182994842529297, 34.861572265625, 40.020538330078125, 14.947212219238281, 45.62553405761719], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000303.npy"} +{"epoch": 0.44493392070484583, "step": 304, "batch_size": 64, "mean": 28.358028411865234, "std": 20.787368774414062, "min": -28.421630859375, "p10": 2.204212951660157, "median": 29.306751251220703, "p90": 58.225981140136724, "max": 63.08823013305664, "pos_frac": 0.90625, "sample": [-2.1608943939208984, 16.059341430664062, 36.79607391357422, 28.69585418701172, 1.9175567626953125, 12.306526184082031, 12.5426025390625, -11.324371337890625, 29.199195861816406, 61.82666015625, 29.414306640625, 57.45088195800781, 17.350624084472656, 54.52843475341797, 34.21797561645508, 35.99137878417969, 14.396949768066406, 58.55816650390625, 16.315773010253906, 17.880481719970703, 40.575416564941406, 60.97520446777344, 25.689727783203125, 18.45068359375, 50.91389465332031, 49.62862014770508, 63.08823013305664, 7.638631820678711, 35.04579162597656, 25.718700408935547, 22.369001388549805, 40.196868896484375, 23.952674865722656, 40.511268615722656, 11.259429931640625, -0.9953155517578125, 32.003082275390625, 11.372543334960938, 30.93030548095703, 31.516666412353516, -28.421630859375, 40.210227966308594, 19.12841796875, 2.873077392578125, 42.93415069580078, -4.989377975463867, 3.34429931640625, 11.748638153076172, 62.721229553222656, 31.814491271972656, 37.0115966796875, -12.627660751342773, 15.14410400390625, 58.5582275390625, 54.654335021972656, 10.269073486328125, 59.461891174316406, 26.639915466308594, 54.857940673828125, 29.111175537109375, 37.93150329589844, 50.4117431640625, 34.78776550292969, 34.53376770019531], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000304.npy"} +{"epoch": 0.44640234948604995, "step": 305, "batch_size": 64, "mean": 28.763023376464844, "std": 26.594858169555664, "min": -8.2049560546875, "p10": 2.0166198730468756, "median": 22.559646606445312, "p90": 59.107295227050784, "max": 141.55838012695312, "pos_frac": 0.9375, "sample": [46.4051513671875, 45.969703674316406, 19.119129180908203, 28.92672348022461, 36.68791580200195, 53.7015380859375, 37.46549987792969, 21.14620018005371, 44.5924072265625, 79.33416748046875, -1.0234756469726562, 11.310127258300781, 30.32530975341797, 16.589508056640625, 28.57964324951172, 56.211395263671875, 34.086334228515625, 52.68321990966797, 43.17139434814453, 5.929450988769531, 141.55838012695312, 50.012451171875, 23.989017486572266, 17.16907501220703, 66.74371337890625, 8.41700553894043, 16.09552574157715, 28.812515258789062, 49.993858337402344, 13.142745971679688, 8.280609130859375, 65.62576293945312, 3.8329925537109375, 19.870168685913086, 33.3248291015625, 3.9044189453125, 59.06744384765625, 25.223068237304688, 17.570703506469727, 13.694847106933594, 12.067113876342773, 0.5666580200195312, 23.02783203125, 7.330604553222656, 50.86962890625, 59.12437438964844, 98.41322326660156, 2.5884246826171875, -2.5735549926757812, 25.856735229492188, 15.05853271484375, 62.00867462158203, 35.612030029296875, 18.074050903320312, -8.2049560546875, 0.085968017578125, 12.615104675292969, -3.3809814453125, 22.091461181640625, 1.7715606689453125, 3.8770980834960938, 37.150482177734375, 3.6214599609375, 5.641382217407227], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000305.npy"} +{"epoch": 0.447870778267254, "step": 306, "batch_size": 64, "mean": 27.875524520874023, "std": 23.330718994140625, "min": -33.739402770996094, "p10": 3.3302150726318365, "median": 26.619155883789062, "p90": 61.411904907226564, "max": 75.01348876953125, "pos_frac": 0.921875, "sample": [12.371959686279297, -10.141162872314453, 24.720073699951172, 17.77557373046875, -12.7041015625, 11.747726440429688, 67.87069702148438, 27.300933837890625, 9.9638671875, 18.114601135253906, 42.51029968261719, 47.688941955566406, -5.174125671386719, 14.44891357421875, 31.904212951660156, 9.755699157714844, 17.46493148803711, 55.86084747314453, 50.25428771972656, 13.719390869140625, 5.5173187255859375, 34.743125915527344, -3.400115966796875, 32.830116271972656, 25.9373779296875, 11.955366134643555, 11.23513412475586, 4.547176361083984, 33.292579650878906, 67.79154968261719, 7.7909698486328125, 1.1313362121582031, 61.68994140625, 34.023406982421875, 12.637947082519531, 56.586341857910156, 6.11945915222168, 3.9508514404296875, 25.338205337524414, 35.593223571777344, -33.739402770996094, 73.02885437011719, 34.547760009765625, 6.4836883544921875, 3.064228057861328, 41.64197540283203, 13.736213684082031, 42.84180450439453, 60.763153076171875, 67.53675842285156, 52.937835693359375, 48.52122497558594, 28.009319305419922, 31.053003311157227, 28.0289306640625, 13.494464874267578, 25.27771759033203, 17.59259033203125, 36.62446594238281, 56.57235336303711, 75.01348876953125, 53.572235107421875, 30.328781127929688, 62.33715057373047], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000306.npy"} +{"epoch": 0.44933920704845814, "step": 307, "batch_size": 64, "mean": 25.549152374267578, "std": 24.11737060546875, "min": -12.283903121948242, "p10": -0.2011135101318357, "median": 21.4307918548584, "p90": 54.73014450073242, "max": 98.60552978515625, "pos_frac": 0.890625, "sample": [49.3238525390625, 13.175361633300781, 5.9891204833984375, 17.875045776367188, 22.64993667602539, 38.72811508178711, -11.00836181640625, 4.2374420166015625, 6.1958465576171875, 94.39227294921875, 22.66642189025879, 14.675209045410156, 58.00956344604492, 11.209770202636719, 50.733551025390625, -10.874687194824219, 54.41838836669922, 3.9272842407226562, 40.566314697265625, -6.3669281005859375, 21.19892120361328, 18.08367156982422, 21.422775268554688, 54.86375427246094, 98.60552978515625, 21.573959350585938, 21.43880844116211, 10.165044784545898, 40.70869827270508, 37.985687255859375, 9.590730667114258, 46.26115417480469, 17.344297409057617, 18.70014190673828, 0.0183868408203125, 12.722713470458984, 23.654495239257812, 7.396736145019531, 59.03895568847656, 33.94615173339844, 21.57074737548828, 36.167694091796875, -0.2951850891113281, 24.319580078125, 38.85499572753906, 15.687156677246094, 39.20606994628906, 46.15900421142578, 0.7220745086669922, 22.68468475341797, 40.87384796142578, 0.4033927917480469, 68.60494995117188, 28.44025421142578, 52.131927490234375, 83.70606994628906, -9.954185485839844, -2.8973388671875, 15.630516052246094, 15.714202880859375, 21.687705993652344, 16.614654541015625, 16.152633666992188, -12.283903121948242], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000307.npy"} +{"epoch": 0.45080763582966227, "step": 308, "batch_size": 64, "mean": 28.892669677734375, "std": 26.63459014892578, "min": -25.335601806640625, "p10": 2.3698194503784196, "median": 24.25887680053711, "p90": 59.121273040771484, "max": 107.04135131835938, "pos_frac": 0.921875, "sample": [40.16171646118164, 68.34378051757812, 4.141025543212891, 26.13103485107422, 11.074577331542969, 88.77017211914062, -25.335601806640625, 17.617034912109375, 57.654876708984375, 7.056310653686523, 38.78302001953125, 20.011905670166016, 16.416027069091797, 17.42414093017578, 50.81072998046875, 59.350830078125, -18.308101654052734, 38.71882629394531, 10.168853759765625, -1.2668991088867188, 10.850341796875, 107.04135131835938, 42.55059814453125, 71.11024475097656, 13.872451782226562, 26.980697631835938, 21.76769256591797, 0.13568687438964844, 35.184574127197266, 27.119600296020508, 31.946868896484375, 23.62749481201172, 30.226394653320312, 39.41326141357422, 36.72636413574219, 55.144500732421875, 37.91107177734375, 13.132621765136719, 24.8902587890625, 8.074151992797852, 11.83659553527832, 54.90739440917969, 18.496002197265625, 34.168243408203125, 27.79478645324707, 104.23809814453125, 18.179649353027344, 26.990455627441406, 10.285850524902344, 52.7344970703125, 85.38011932373047, 50.21751403808594, 6.951835632324219, 17.563583374023438, -7.805532455444336, 1.6107311248779297, 58.58563995361328, 44.46343231201172, 14.256149291992188, -3.0261688232421875, 12.746200561523438, 9.696701049804688, 8.276798248291016, 5.151756286621094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000308.npy"} +{"epoch": 0.4522760646108664, "step": 309, "batch_size": 64, "mean": 28.214529037475586, "std": 24.55332374572754, "min": -16.1966552734375, "p10": -1.191508865356445, "median": 26.839466094970703, "p90": 60.88320617675782, "max": 96.36068725585938, "pos_frac": 0.859375, "sample": [1.8738899230957031, 13.008724212646484, 7.969018936157227, -12.125396728515625, 17.897300720214844, 53.950927734375, 22.369970321655273, -0.24370193481445312, 50.78529357910156, 23.3631591796875, 52.93495178222656, 40.249176025390625, 23.195301055908203, 64.36677551269531, 15.635528564453125, 12.780447006225586, -10.487789154052734, 60.487213134765625, 51.448883056640625, 38.941795349121094, 39.395896911621094, -16.1966552734375, 43.991188049316406, -3.7753143310546875, 33.362091064453125, 7.5409698486328125, 54.2818717956543, 32.88995361328125, -0.899200439453125, -1.3167839050292969, 0.8802566528320312, 63.85858154296875, 29.024215698242188, 4.4117279052734375, 32.05254364013672, 61.117401123046875, 96.36068725585938, 27.84844207763672, 20.081148147583008, -2.041351318359375, 5.546024322509766, 7.312963485717773, 16.072711944580078, 76.28123474121094, 13.360870361328125, 32.746910095214844, 61.05291748046875, 8.017974853515625, 45.255043029785156, 11.302745819091797, 56.05132293701172, -1.4135513305664062, 34.64064025878906, 56.934326171875, 6.763917922973633, 27.917842864990234, 36.10658264160156, 85.86305236816406, 22.735733032226562, 32.38488006591797, 34.72856140136719, 23.641584396362305, 35.32598876953125, 25.830490112304688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000309.npy"} +{"epoch": 0.45374449339207046, "step": 310, "batch_size": 64, "mean": 24.480121612548828, "std": 20.728391647338867, "min": -4.174957275390625, "p10": 3.291192626953125, "median": 18.018381118774414, "p90": 56.42516708374025, "max": 81.45247650146484, "pos_frac": 0.953125, "sample": [-2.370098114013672, 26.74712371826172, 16.526084899902344, 17.45014190673828, 32.81314468383789, 49.213294982910156, 47.39674758911133, 8.645380020141602, 6.88279914855957, 28.31024169921875, 0.2938957214355469, 30.08026123046875, 37.21336364746094, 18.586620330810547, 3.9571094512939453, 4.254646301269531, 19.668298721313477, 41.0228271484375, 13.081886291503906, 44.9711799621582, 23.134357452392578, -2.069681167602539, 78.8306884765625, 3.285369873046875, 9.2740478515625, 39.50617980957031, 53.714691162109375, 23.731914520263672, 25.054758071899414, 15.304039001464844, 8.99945068359375, 22.832115173339844, 8.026782989501953, 3.2717018127441406, 71.40925598144531, 15.754009246826172, 26.018142700195312, 11.824193954467773, 10.768798828125, -4.174957275390625, 16.241973876953125, 27.140045166015625, 48.16236877441406, 58.47410583496094, 17.42645263671875, 62.16136169433594, 81.45247650146484, 0.25743865966796875, 58.88336181640625, 3.304779052734375, 19.618080139160156, 7.012044906616211, 15.633285522460938, 17.057899475097656, 12.41839599609375, 4.4396514892578125, 3.420166015625, 39.01311492919922, 48.22784423828125, 13.462757110595703, 9.231193542480469, 32.55516815185547, 57.58679962158203, 24.3062801361084], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000310.npy"} +{"epoch": 0.4552129221732746, "step": 311, "batch_size": 64, "mean": 34.34797286987305, "std": 25.421321868896484, "min": -20.857208251953125, "p10": 5.890789985656738, "median": 30.237972259521484, "p90": 72.03687438964845, "max": 103.04837036132812, "pos_frac": 0.96875, "sample": [21.84406280517578, 54.49542236328125, 60.832237243652344, 21.93341827392578, 1.4692020416259766, 24.17034912109375, 13.413280487060547, 47.70599365234375, 72.8861083984375, 58.83903503417969, 44.74891662597656, 13.998626708984375, 44.057918548583984, 80.66146850585938, 10.906436920166016, 13.959381103515625, 37.956298828125, 23.299463272094727, 21.258636474609375, 59.495208740234375, 32.979644775390625, 59.527198791503906, 70.05532836914062, 22.84215545654297, 9.374076843261719, 35.30080032348633, 31.40240478515625, 15.977157592773438, 30.629066467285156, 52.00702667236328, 9.252761840820312, 32.07847213745117, 26.7469482421875, 51.51478576660156, 55.1357421875, -20.857208251953125, 6.0781707763671875, 103.04837036132812, -2.5849685668945312, 47.79966735839844, 15.554351806640625, 12.688423156738281, 50.49293518066406, 24.951189041137695, 74.03489685058594, 29.700042724609375, 2.332500457763672, 5.810483932495117, 50.77480697631836, 51.31095886230469, 4.725856781005859, 81.14859008789062, 28.798751831054688, 3.0349960327148438, 13.446502685546875, 86.86341857910156, 38.508949279785156, 29.846878051757812, 9.462318420410156, 9.821556091308594, 48.566650390625, 49.03948974609375, 6.448600769042969, 74.6680679321289], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000311.npy"} +{"epoch": 0.4566813509544787, "step": 312, "batch_size": 64, "mean": 29.852706909179688, "std": 26.55569839477539, "min": -13.208303451538086, "p10": -3.887220764160156, "median": 27.987163543701172, "p90": 61.9832332611084, "max": 104.31178283691406, "pos_frac": 0.84375, "sample": [24.432857513427734, 92.80812072753906, 60.294891357421875, 29.174545288085938, 29.653398513793945, 11.992408752441406, 24.232967376708984, 6.713933944702148, 43.57597351074219, 61.208473205566406, -9.498716354370117, 98.43940734863281, 93.9454345703125, 53.66712951660156, 50.425323486328125, 20.313079833984375, -9.406423568725586, 14.782821655273438, 46.46839904785156, 70.01056671142578, 22.15833282470703, 13.280899047851562, 34.50608825683594, 35.03790283203125, 22.957305908203125, -13.208303451538086, 31.83612823486328, 52.8565673828125, 32.793296813964844, 16.773086547851562, 15.243721008300781, -3.957427978515625, 34.487205505371094, 23.038299560546875, 14.093772888183594, 22.220535278320312, 35.37810516357422, 62.31527328491211, 29.461669921875, 36.15615463256836, 65.5171127319336, 9.711271286010742, -4.010869979858398, 17.537151336669922, 30.40557289123535, 29.19062042236328, -6.542713165283203, 104.31178283691406, -3.7234039306640625, 48.9134521484375, 15.47861099243164, -3.2704010009765625, 12.54412841796875, 10.809253692626953, 10.556640625, 44.36811828613281, 42.854915618896484, -1.477762222290039, -7.27276611328125, 27.77273941040039, 52.31461715698242, 16.228622436523438, 28.201587677001953, 39.49172592163086], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000312.npy"} +{"epoch": 0.4581497797356828, "step": 313, "batch_size": 64, "mean": 25.848873138427734, "std": 26.925546646118164, "min": -26.848411560058594, "p10": -3.0759815216064452, "median": 20.964481353759766, "p90": 64.723885345459, "max": 94.69051361083984, "pos_frac": 0.84375, "sample": [43.433746337890625, 26.28232192993164, -1.0288162231445312, 30.998672485351562, 22.17395782470703, 27.080703735351562, 53.242061614990234, 44.51271057128906, 61.56652069091797, 9.628791809082031, 7.672473907470703, -0.92803955078125, 9.1312255859375, 15.36224365234375, 5.037021636962891, -3.8889541625976562, 23.821273803710938, 38.123512268066406, 22.81167984008789, 31.71197509765625, 25.45691680908203, 66.07704162597656, -26.848411560058594, 1.8635425567626953, 17.301681518554688, 69.11832427978516, 35.4950065612793, 10.612689971923828, 21.595481872558594, 88.24822998046875, 49.619361877441406, 38.665496826171875, 0.6450099945068359, 25.280258178710938, -7.813575744628906, 4.311017990112305, -2.8887062072753906, -7.542732238769531, 17.145418167114258, 44.656005859375, 1.2038192749023438, 15.867927551269531, 8.853580474853516, 20.333480834960938, 79.57989501953125, 33.776920318603516, -4.185600280761719, 45.683677673339844, 78.23881530761719, -3.1562423706054688, 60.78941345214844, 36.784889221191406, 14.0399169921875, 2.1119651794433594, -7.864034652709961, 89.41049194335938, 45.62042236328125, 10.949516296386719, 0.7130393981933594, 15.578643798828125, 10.837333679199219, 52.662940979003906, 14.063362121582031, 94.69051361083984], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000313.npy"} +{"epoch": 0.45961820851688695, "step": 314, "batch_size": 64, "mean": 25.937664031982422, "std": 25.701616287231445, "min": -14.2586669921875, "p10": -1.3560607910156244, "median": 19.082908630371094, "p90": 56.271993255615236, "max": 108.0374526977539, "pos_frac": 0.859375, "sample": [90.92593383789062, -12.970504760742188, 7.724567413330078, 0.8988037109375, 2.7501373291015625, 68.49433135986328, 12.380241394042969, 37.03767395019531, 9.328376770019531, 10.011688232421875, 47.846683502197266, -1.6055145263671875, 15.522529602050781, 32.74699401855469, 20.04317855834961, -14.2586669921875, 0.2081451416015625, 32.11212921142578, 55.36772155761719, 77.44305419921875, 13.186660766601562, -2.333017349243164, 5.4530487060546875, 7.53118896484375, 19.131332397460938, -5.512920379638672, 2.5932159423828125, 7.098682403564453, 14.605415344238281, 56.250572204589844, 56.35693359375, 13.220252990722656, 44.5299072265625, 49.53173828125, 46.663909912109375, -2.9180564880371094, 25.294158935546875, 57.655845642089844, 55.21788024902344, 25.141510009765625, 39.57695770263672, 53.73809051513672, 108.0374526977539, 42.588951110839844, 8.73440170288086, 16.784263610839844, 15.51641845703125, 45.49110412597656, 0.349029541015625, 10.293384552001953, 28.037200927734375, -0.7740020751953125, 47.563720703125, 46.343013763427734, 49.76231384277344, 19.255172729492188, 12.114595413208008, -9.509376525878906, 34.36285400390625, 12.439430236816406, 23.36003875732422, -0.0760040283203125, 56.28117370605469, 19.03448486328125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000314.npy"} +{"epoch": 0.461086637298091, "step": 315, "batch_size": 64, "mean": 24.5708065032959, "std": 22.940462112426758, "min": -10.803165435791016, "p10": 2.612305450439453, "median": 20.272245407104492, "p90": 57.58579978942871, "max": 107.55924987792969, "pos_frac": 0.90625, "sample": [42.790496826171875, 24.402677536010742, 16.397323608398438, 16.008331298828125, -1.3838062286376953, 29.732559204101562, 4.791341781616211, 18.13226318359375, -0.6799774169921875, 2.6231307983398438, 42.889251708984375, -1.152547836303711, -10.803165435791016, 16.172393798828125, 25.22570037841797, 63.123687744140625, 5.401653289794922, 29.14960289001465, 3.5836563110351562, 28.074310302734375, 25.984375, 59.594703674316406, 16.399024963378906, 2.8324508666992188, 19.785675048828125, 13.02730941772461, 53.64527893066406, 11.09620475769043, 58.00605773925781, -7.967998504638672, 78.2247314453125, 22.474288940429688, 2.607666015625, 20.75881576538086, 30.839305877685547, 53.563629150390625, 24.966461181640625, 46.713653564453125, 58.092002868652344, 6.863246917724609, 26.862396240234375, 30.703460693359375, 107.55924987792969, 10.692008972167969, 38.35943603515625, 14.109893798828125, -8.766807556152344, 3.949148178100586, 7.7919158935546875, 24.917694091796875, 12.88546371459961, 32.49661636352539, 83.7336196899414, 16.83971405029297, 22.320518493652344, 10.694948196411133, 56.60519790649414, 12.52935791015625, 34.11724853515625, 13.955429077148438, 20.851806640625, 14.768983840942383, 6.942331314086914, 25.626129150390625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000315.npy"} +{"epoch": 0.46255506607929514, "step": 316, "batch_size": 64, "mean": 29.87819480895996, "std": 26.384305953979492, "min": -14.510581970214844, "p10": 1.7542934417724616, "median": 24.184188842773438, "p90": 69.4810287475586, "max": 107.33357238769531, "pos_frac": 0.9375, "sample": [30.306228637695312, 15.21200942993164, 50.39225769042969, 21.096349716186523, 21.98113250732422, 67.79127502441406, 30.677146911621094, 21.68988800048828, 34.583984375, 36.18189239501953, 30.707130432128906, 37.88380432128906, 1.4422073364257812, 2.482494354248047, 19.13149642944336, 44.85307312011719, 45.10593032836914, 31.341449737548828, -2.974590301513672, 20.335845947265625, 8.763587951660156, 10.68526840209961, 105.70674896240234, 19.582351684570312, 70.70774841308594, 35.01176452636719, 16.742633819580078, 76.61592102050781, 33.5443115234375, 82.1336441040039, 0.34105873107910156, 10.413612365722656, 6.7230072021484375, 33.54690170288086, 21.082042694091797, 57.977325439453125, 21.36944580078125, 5.961631774902344, 69.94007873535156, -1.0226669311523438, 96.48037719726562, 26.387245178222656, 8.3270263671875, 45.70989990234375, 21.44183921813965, 30.731307983398438, 1.3688278198242188, 3.959453582763672, 107.33357238769531, 36.148399353027344, 68.409912109375, 18.876876831054688, 29.01045036315918, 17.197446823120117, 43.28068542480469, 3.6700897216796875, -3.7016220092773438, 26.978805541992188, 9.68243408203125, -14.510581970214844, 30.571212768554688, 7.197322845458984, 20.765037536621094, 30.84100341796875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000316.npy"} +{"epoch": 0.46402349486049926, "step": 317, "batch_size": 64, "mean": 33.848411560058594, "std": 25.116172790527344, "min": -9.27984619140625, "p10": 4.161865997314454, "median": 31.784591674804688, "p90": 67.57322387695314, "max": 110.87234497070312, "pos_frac": 0.90625, "sample": [47.08454132080078, 21.879920959472656, 25.936813354492188, 45.443603515625, 87.21343994140625, -9.27984619140625, 59.515785217285156, 47.64729309082031, -6.143218994140625, 22.48065948486328, 110.87234497070312, 24.297767639160156, 41.221099853515625, 37.865447998046875, 34.15589904785156, 49.54625701904297, 46.10319519042969, 22.980499267578125, 21.92884063720703, 29.084609985351562, 3.982086181640625, 31.753326416015625, 32.378013610839844, 72.92808532714844, 8.195018768310547, 77.2530517578125, -2.2520790100097656, 4.581352233886719, 64.28285217285156, 38.63140869140625, 43.49787902832031, 28.602798461914062, 30.043476104736328, 49.733741760253906, 54.192779541015625, 5.9284210205078125, 57.68592071533203, 31.81585693359375, 54.98976135253906, 27.511093139648438, 8.285308837890625, 69.0299301147461, -1.989084243774414, 13.638372421264648, 59.366455078125, 74.65392303466797, 54.223907470703125, -2.8540496826171875, 41.21381759643555, 8.05732536315918, 16.18770408630371, 47.93846893310547, 58.399688720703125, 32.48405456542969, 12.601066589355469, 14.004547119140625, -0.6075553894042969, 16.24443817138672, 48.877830505371094, 16.829544067382812, 8.711334228515625, 15.285507202148438, 11.162513732910156, 68.98338317871094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000317.npy"} +{"epoch": 0.4654919236417034, "step": 318, "batch_size": 64, "mean": 30.52468490600586, "std": 25.276248931884766, "min": -15.318862915039062, "p10": -0.0843986511230449, "median": 29.37831974029541, "p90": 67.13362121582031, "max": 101.45472717285156, "pos_frac": 0.890625, "sample": [5.846014022827148, 3.6398468017578125, 82.98585510253906, 55.436279296875, 25.64202880859375, 17.252628326416016, 9.070411682128906, 34.885498046875, 28.314544677734375, 79.15789794921875, 3.1203384399414062, 36.70245361328125, 34.883209228515625, 30.882415771484375, 101.45472717285156, 37.9740104675293, -0.898529052734375, 35.083106994628906, 12.513015747070312, 29.68309211730957, 38.1195068359375, 19.026161193847656, 1.8152389526367188, 55.60626983642578, 71.71247100830078, 56.02276611328125, -1.912363052368164, 2.217395782470703, 30.37057876586914, 29.747798919677734, 20.995529174804688, 37.972591400146484, 32.01885986328125, 90.85189819335938, 16.16950225830078, 37.051631927490234, 52.42458724975586, -3.383686065673828, -15.318862915039062, 65.68103790283203, 71.17741394042969, 20.068870544433594, 8.834918975830078, 25.078147888183594, 56.52978515625, 20.594383239746094, 39.61304473876953, 31.429122924804688, -2.6162796020507812, 5.87030029296875, -2.5297393798828125, 17.355201721191406, 41.39070129394531, 44.133995056152344, 59.491153717041016, 20.44646453857422, 43.43653869628906, 14.06991958618164, 13.699363708496094, 29.07354736328125, -3.317474365234375, 67.75615692138672, 18.942054748535156, 12.234329223632812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000318.npy"} +{"epoch": 0.4669603524229075, "step": 319, "batch_size": 64, "mean": 29.426651000976562, "std": 22.474348068237305, "min": -5.428596496582031, "p10": 3.2250631332397464, "median": 27.32065200805664, "p90": 59.21873931884767, "max": 88.7397232055664, "pos_frac": 0.9375, "sample": [29.130599975585938, 31.605770111083984, 23.207244873046875, 35.343994140625, 45.339134216308594, 25.567001342773438, 53.211395263671875, 14.440956115722656, 42.68336486816406, 17.927772521972656, 6.15814208984375, -3.989654541015625, 45.3232421875, 10.640510559082031, 1.5566482543945312, 23.4840087890625, 16.265472412109375, 44.32232666015625, 63.48253631591797, 29.04975128173828, 27.752365112304688, 27.784820556640625, 9.757797241210938, 37.26665496826172, 45.45460510253906, 3.430665969848633, 17.731468200683594, 60.27104187011719, 11.289794921875, 52.358741760253906, 7.380876541137695, 37.268856048583984, 18.333099365234375, 17.434799194335938, 3.1369476318359375, 21.50420379638672, 37.088783264160156, 23.949886322021484, -4.568794250488281, 28.16476058959961, 78.40472412109375, 88.7397232055664, 84.2524185180664, 18.302045822143555, 32.454742431640625, 4.60203742980957, 50.823524475097656, 36.34919738769531, 46.60293197631836, 69.46749877929688, 84.494384765625, 32.38988494873047, 8.050041198730469, 25.238037109375, -0.8217887878417969, 5.935863494873047, -5.428596496582031, 2.9181175231933594, 11.728553771972656, 26.888938903808594, 56.76336669921875, 23.411903381347656, 34.66651153564453, 29.53002166748047], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000319.npy"} +{"epoch": 0.4684287812041116, "step": 320, "batch_size": 64, "mean": 26.876949310302734, "std": 23.658065795898438, "min": -23.337310791015625, "p10": 5.013620758056641, "median": 20.95814323425293, "p90": 62.085929870605476, "max": 84.99072265625, "pos_frac": 0.9375, "sample": [13.85052490234375, 30.680660247802734, 64.92999267578125, 7.749351501464844, 53.27806854248047, 7.619544982910156, 50.93000793457031, 17.725135803222656, 80.35874938964844, 12.232368469238281, 68.64083099365234, 44.9659423828125, 1.815481185913086, 30.22784423828125, 44.650901794433594, 62.66282653808594, 24.167572021484375, 6.438079833984375, 12.158157348632812, 2.1563873291015625, 12.651542663574219, 51.336456298828125, 13.113504409790039, 12.159828186035156, 23.567115783691406, 20.430458068847656, 37.0772705078125, 14.566577911376953, -10.918922424316406, 24.8883056640625, -2.29425048828125, 44.13636016845703, 5.0120391845703125, 53.80465316772461, 43.91423034667969, 20.68453598022461, 5.78997802734375, 31.36962890625, 83.28573608398438, 15.73968505859375, 37.12305450439453, -5.327606201171875, 12.7069091796875, 6.295204162597656, 84.99072265625, 11.319847106933594, 39.14141845703125, 36.025238037109375, 8.042064666748047, 5.6092987060546875, 18.267929077148438, 16.2318115234375, 28.669815063476562, 70.3726577758789, 36.48542785644531, 45.97138977050781, 60.739837646484375, 6.00273323059082, 28.278427124023438, 5.017311096191406, 5.0442657470703125, 27.66936492919922, 21.23175048828125, -23.337310791015625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000320.npy"} +{"epoch": 0.4698972099853157, "step": 321, "batch_size": 64, "mean": 26.116397857666016, "std": 28.0751953125, "min": -26.505264282226562, "p10": -4.9040220260620115, "median": 23.710575103759766, "p90": 61.31600379943849, "max": 124.41720581054688, "pos_frac": 0.796875, "sample": [62.568267822265625, 33.425323486328125, 48.02311706542969, 28.032073974609375, 2.1521148681640625, 28.46318244934082, 63.732120513916016, -5.041309356689453, 7.473541259765625, 51.20222473144531, 44.20974349975586, 0.36588287353515625, -26.505264282226562, -4.583684921264648, -7.355171203613281, 30.238449096679688, 36.841094970703125, 40.55366516113281, 53.677886962890625, -1.611602783203125, 49.7799072265625, 4.095741271972656, -2.4409332275390625, 33.31596374511719, 49.519256591796875, 26.603477478027344, 30.544326782226562, -13.946226119995117, 20.873882293701172, 16.751510620117188, 19.165069580078125, 86.62210845947266, 5.860595703125, 13.160903930664062, 19.17021942138672, 77.86862182617188, -2.0831451416015625, -12.955965042114258, 124.41720581054688, 31.29737091064453, 4.279624938964844, -2.6043243408203125, 9.152273178100586, 25.51331329345703, 3.6036109924316406, 40.09281921386719, 6.201416015625, -8.661977767944336, 43.80772399902344, 34.562538146972656, 55.50763702392578, 64.8516845703125, 11.459039688110352, -1.1763153076171875, 20.83312225341797, 49.56452941894531, 41.85075378417969, 58.3940544128418, 11.29539680480957, 69.80935668945312, 16.599029541015625, 45.45688247680664, 21.9078369140625, -14.332122802734375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000321.npy"} +{"epoch": 0.4713656387665198, "step": 322, "batch_size": 64, "mean": 31.63241958618164, "std": 27.988502502441406, "min": -5.576244354248047, "p10": 1.5635950088500983, "median": 23.287200927734375, "p90": 72.7325424194336, "max": 90.50723266601562, "pos_frac": 0.90625, "sample": [21.989459991455078, 34.6121826171875, 29.728614807128906, 71.908203125, 5.222261428833008, -5.576244354248047, 64.95968627929688, 46.43516540527344, 86.7396240234375, -4.3488922119140625, 1.2514419555664062, 13.269859313964844, 69.46458435058594, 34.14002990722656, 26.511749267578125, 47.86953353881836, 26.320106506347656, 44.93260192871094, 6.6472015380859375, 3.9591827392578125, 90.50723266601562, 31.043060302734375, 17.191261291503906, 65.72241973876953, -0.11495208740234375, 16.506568908691406, 67.54815673828125, 86.81554412841797, -4.408164978027344, 89.44744110107422, 11.683784484863281, 17.7149658203125, 41.744441986083984, 27.053184509277344, 23.761802673339844, 9.66494369506836, 13.926078796386719, 33.98450469970703, 7.6099700927734375, 48.88519287109375, 15.18603515625, 73.08583068847656, 57.35303497314453, 9.58255386352539, 52.6129150390625, 60.60911560058594, 18.431175231933594, -1.5795326232910156, 45.50677490234375, 9.547042846679688, 17.18780517578125, 88.73960876464844, 22.812599182128906, 11.659175872802734, 13.510276794433594, 23.824356079101562, 8.882583618164062, 20.919883728027344, 3.774759292602539, -4.918701171875, 79.50173950195312, 5.855583190917969, 67.77456665039062, 2.291952133178711], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000322.npy"} +{"epoch": 0.47283406754772395, "step": 323, "batch_size": 64, "mean": 31.286056518554688, "std": 25.77242660522461, "min": -9.92477035522461, "p10": 2.7031930923461918, "median": 26.151376724243164, "p90": 65.30990753173829, "max": 112.2451171875, "pos_frac": 0.921875, "sample": [66.89202880859375, 50.697357177734375, 60.86676025390625, 10.821159362792969, 84.50501251220703, -9.477958679199219, 41.01545715332031, 52.24720764160156, 7.122570037841797, 8.985160827636719, 56.19587707519531, 9.145271301269531, 9.559776306152344, 44.894561767578125, 0.07949066162109375, 23.878860473632812, 25.22418975830078, 24.15992546081543, 54.14264678955078, 112.2451171875, -0.9130363464355469, 3.1314525604248047, 67.88204193115234, 45.31410217285156, 14.485248565673828, 68.95970916748047, 30.690553665161133, 24.369346618652344, 17.037704467773438, 17.516143798828125, 31.13509750366211, 48.03749084472656, 32.550872802734375, 24.128210067749023, 12.77239990234375, 2.5196533203125, 5.920989990234375, 11.011833190917969, 54.43315887451172, 43.89875793457031, 26.354206085205078, 18.965057373046875, 28.7010498046875, 85.50497436523438, -7.4646759033203125, 45.56944274902344, 55.764183044433594, 49.99135971069336, 25.94854736328125, 4.254386901855469, 59.45310974121094, 65.87178039550781, 7.66455078125, 37.4363899230957, -9.92477035522461, 30.553836822509766, 42.396141052246094, 29.473209381103516, 10.098518371582031, 63.998870849609375, 21.476882934570312, -7.707036972045898, 7.117527008056641, 22.727928161621094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000323.npy"} +{"epoch": 0.47430249632892807, "step": 324, "batch_size": 64, "mean": 29.687463760375977, "std": 26.02460479736328, "min": -12.558183670043945, "p10": 1.3223396301269537, "median": 24.903850555419922, "p90": 69.10287399291994, "max": 96.87274169921875, "pos_frac": 0.90625, "sample": [18.21978759765625, 25.021568298339844, 11.252330780029297, 31.990066528320312, 12.642816543579102, 16.6724853515625, 18.817893981933594, 8.501724243164062, 29.34575653076172, 23.54814338684082, 96.87274169921875, 2.4467926025390625, 78.3165283203125, 9.817161560058594, 37.47760009765625, 88.95919799804688, -12.01959228515625, 6.12664794921875, 19.083175659179688, 51.50991439819336, 9.343025207519531, 35.820526123046875, 1.8232536315917969, 38.03578567504883, 35.29804992675781, 63.7189826965332, 65.62660217285156, -7.7711639404296875, 15.243568420410156, 64.87338256835938, 70.59270477294922, 20.143966674804688, 25.868515014648438, 13.646514892578125, 45.72985076904297, 62.12467956542969, 35.02813720703125, 40.559112548828125, 14.331199645996094, 13.650808334350586, 22.565818786621094, -4.40496826171875, 5.752006530761719, 19.552047729492188, 51.316162109375, 24.7861328125, -4.342449188232422, -12.558183670043945, 43.36219024658203, 30.12921142578125, 45.287689208984375, 77.15605926513672, 33.908485412597656, 1.1076622009277344, 36.50022888183594, 44.816978454589844, 75.09001159667969, -1.5276927947998047, 4.148723602294922, 85.99288940429688, 28.540050506591797, 21.731765747070312, 4.0907135009765625, 28.705970764160156], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000324.npy"} +{"epoch": 0.47577092511013214, "step": 325, "batch_size": 64, "mean": 28.70180320739746, "std": 21.971115112304688, "min": -10.567436218261719, "p10": 0.583955764770509, "median": 28.38096809387207, "p90": 59.18320541381838, "max": 77.57283020019531, "pos_frac": 0.90625, "sample": [-3.9786758422851562, -1.27276611328125, 52.70220947265625, -4.192661285400391, 42.80693054199219, 75.89482879638672, 3.034088134765625, -10.567436218261719, 39.02400207519531, 63.64720916748047, 12.146476745605469, 54.45726013183594, 71.6639404296875, 15.489425659179688, 38.94517135620117, 24.39969825744629, 39.72735595703125, 26.500118255615234, 2.5250244140625, 46.9720458984375, 11.328758239746094, 66.43910217285156, 61.20861053466797, 63.13684844970703, 4.650547027587891, 0.07411575317382812, 33.37263870239258, 31.126794815063477, 25.269424438476562, 15.551597595214844, 54.02500534057617, 34.79930114746094, 45.64973449707031, 3.255725860595703, 44.950035095214844, 30.096908569335938, 48.9664306640625, 37.08894729614258, 25.87903594970703, 41.22419357299805, 7.0594024658203125, 1.7735824584960938, 45.039642333984375, 10.075170516967773, 39.217926025390625, 15.221151351928711, 14.613182067871094, 27.799617767333984, -2.0339508056640625, 31.029727935791016, 22.538116455078125, -3.301971435546875, 4.5549163818359375, 77.57283020019531, 20.875350952148438, 23.092185974121094, 36.722023010253906, 26.966096878051758, 40.399169921875, 28.962318420410156, 36.57502746582031, 3.9288330078125, 11.060386657714844, 49.15667724609375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000325.npy"} +{"epoch": 0.47723935389133626, "step": 326, "batch_size": 64, "mean": 25.774160385131836, "std": 24.158143997192383, "min": -11.115692138671875, "p10": -0.2894470214843746, "median": 19.549297332763672, "p90": 60.580932617187514, "max": 102.02278137207031, "pos_frac": 0.890625, "sample": [0.105712890625, 28.665470123291016, 5.8630523681640625, 66.91387176513672, 43.24119567871094, -0.45880126953125, -3.3421173095703125, 33.48416519165039, 42.690887451171875, -11.115692138671875, 26.729320526123047, 16.579814910888672, 102.02278137207031, -2.5951766967773438, -4.715642929077148, 13.68109130859375, 9.334163665771484, 15.3782958984375, 16.749969482421875, 57.758277893066406, -6.47125244140625, 0.7042999267578125, 61.79064178466797, 64.50244140625, 13.845474243164062, 8.69854736328125, 83.83413696289062, 53.19489669799805, 1.9328384399414062, 4.188793182373047, 24.184722900390625, 43.994361877441406, 51.43671417236328, 34.4117317199707, 18.072341918945312, -2.8148956298828125, 7.631557464599609, 19.59990692138672, 14.127574920654297, 49.410614013671875, 28.259796142578125, 21.68572998046875, 18.41927719116211, 19.498687744140625, 31.601402282714844, 13.108890533447266, 5.849403381347656, 19.610595703125, 9.276763916015625, 28.682044982910156, 2.5076217651367188, 72.92807006835938, 52.233978271484375, 21.8607177734375, 68.6996841430664, 32.747337341308594, 30.637008666992188, 10.71295166015625, 1.2849292755126953, 22.91015625, 49.884986877441406, 52.04469299316406, 16.656566619873047, 15.1988525390625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000326.npy"} +{"epoch": 0.4787077826725404, "step": 327, "batch_size": 64, "mean": 37.30348587036133, "std": 25.048036575317383, "min": -9.133087158203125, "p10": 7.901258468627931, "median": 35.78357696533203, "p90": 74.64009704589844, "max": 106.99537658691406, "pos_frac": 0.921875, "sample": [38.42254638671875, -0.5779476165771484, 75.62722778320312, 53.59609603881836, 15.929931640625, 50.17212677001953, -4.233650207519531, 45.58226013183594, 33.884857177734375, 47.85723114013672, 47.66827392578125, 50.51676940917969, 16.073638916015625, 52.83514404296875, 31.3699951171875, 25.27063751220703, 20.001102447509766, 66.04489135742188, 79.52882385253906, 50.86119842529297, -6.491783142089844, 46.17521667480469, 75.82475280761719, 50.996070861816406, 13.807079315185547, 66.67572021484375, 30.639171600341797, 7.693828582763672, 47.416831970214844, 57.351806640625, 59.31456756591797, -2.520660400390625, 33.863616943359375, 34.426513671875, 25.287277221679688, 28.688098907470703, 76.34831237792969, 42.27928161621094, 38.06126403808594, 37.53189468383789, 30.27570343017578, 5.804695129394531, 35.24415588378906, 83.57278442382812, 28.66046142578125, 8.787712097167969, 17.34756088256836, 36.322998046875, 72.3367919921875, -9.133087158203125, 59.892852783203125, 37.36044692993164, 106.99537658691406, 10.326927185058594, 48.23713684082031, 19.368032455444336, 27.334938049316406, 88.68534088134766, 18.820480346679688, 43.9422607421875, 26.287925720214844, 8.385261535644531, 13.67181396484375, 9.0946044921875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000327.npy"} +{"epoch": 0.4801762114537445, "step": 328, "batch_size": 64, "mean": 27.931251525878906, "std": 25.192350387573242, "min": -16.01385498046875, "p10": 2.4425888061523438, "median": 20.628297805786133, "p90": 62.71346588134766, "max": 106.76388549804688, "pos_frac": 0.921875, "sample": [37.78790283203125, -0.512176513671875, 6.659431457519531, 20.608631134033203, 17.500137329101562, 27.427490234375, 33.11896896362305, 24.180831909179688, 34.86561584472656, 5.645576477050781, 30.038633346557617, 14.884159088134766, 47.08160400390625, 73.71991729736328, -16.01385498046875, 35.67315673828125, -0.26081085205078125, 7.951484680175781, 5.410112380981445, 31.640625, -8.931346893310547, 6.884124755859375, 41.20105743408203, 62.799163818359375, 50.48406982421875, 62.51350402832031, 10.384017944335938, 5.120849609375, 43.326393127441406, 71.97222137451172, 5.871831893920898, 17.761260986328125, 53.72964096069336, 15.39156723022461, -2.309507369995117, 17.994888305664062, 20.647964477539062, 44.17900848388672, 27.639862060546875, 19.32049560546875, 2.5230445861816406, 51.86835479736328, 2.4081077575683594, 15.192073822021484, 3.6730823516845703, 83.4455337524414, 8.678674697875977, 17.862564086914062, 58.52909851074219, 36.04136657714844, 53.03900146484375, 106.76388549804688, 17.865142822265625, 15.27581787109375, 1.553497314453125, 72.06939697265625, 72.91578674316406, 8.179824829101562, 6.600776672363281, 25.165119171142578, 44.528045654296875, 55.156036376953125, 20.989294052124023, 3.8880767822265625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000328.npy"} +{"epoch": 0.48164464023494863, "step": 329, "batch_size": 64, "mean": 37.172855377197266, "std": 26.980588912963867, "min": -21.483566284179688, "p10": 8.542887115478518, "median": 36.54423141479492, "p90": 68.84908599853516, "max": 108.38369750976562, "pos_frac": 0.9375, "sample": [48.895355224609375, 71.8336181640625, 87.64993286132812, 46.426673889160156, 23.928787231445312, 17.58185386657715, 66.77281188964844, 24.10596466064453, 94.4532470703125, 16.314321517944336, 41.295379638671875, 23.918228149414062, 26.944087982177734, 2.4093704223632812, 18.85837173461914, 35.605247497558594, 46.94915771484375, 48.32469940185547, 9.857381820678711, 108.38369750976562, 17.059120178222656, 19.799774169921875, 33.00633239746094, 38.99884796142578, 1.132568359375, 10.090408325195312, 15.588668823242188, 86.44189453125, 52.846099853515625, 50.4141845703125, 49.327239990234375, 14.102672576904297, 65.68758392333984, 13.678068161010742, 37.48321533203125, 69.64303588867188, 58.40264892578125, 56.561187744140625, 61.14710998535156, 46.0290641784668, 16.627857208251953, 11.067188262939453, -17.822601318359375, 57.1536865234375, 58.89410400390625, 20.497501373291016, 58.93666076660156, -15.362106323242188, 20.785802841186523, 20.506038665771484, 7.979532241821289, -1.463705062866211, 34.83012390136719, 49.266845703125, 20.78197479248047, 27.58776092529297, 42.64237976074219, 66.99653625488281, 78.85594177246094, 31.076980590820312, -21.483566284179688, 45.330482482910156, 60.705909729003906, 46.72346496582031], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000329.npy"} +{"epoch": 0.4831130690161527, "step": 330, "batch_size": 64, "mean": 31.302356719970703, "std": 24.48064422607422, "min": -23.04400634765625, "p10": 7.983120346069337, "median": 23.03891944885254, "p90": 62.92135314941407, "max": 94.6649169921875, "pos_frac": 0.9375, "sample": [94.6649169921875, 16.5576171875, 0.7209320068359375, -12.697820663452148, 32.728271484375, 55.33805847167969, 20.064014434814453, 37.712493896484375, 14.048563003540039, 10.26654052734375, 9.426755905151367, 18.23907470703125, 34.56196594238281, 51.813201904296875, 76.53340148925781, 14.346542358398438, 27.465835571289062, 42.62922668457031, 36.387996673583984, 55.29847717285156, 15.351337432861328, 21.306686401367188, 63.58064270019531, 23.081974029541016, 24.742477416992188, 54.989315032958984, -23.04400634765625, 41.00721740722656, 28.40603256225586, 12.193181991577148, 48.60028076171875, 12.989921569824219, 10.457908630371094, 14.093894958496094, 59.921653747558594, 22.97760772705078, 21.370323181152344, 55.33348083496094, 16.14832305908203, 16.7932071685791, 7.432319641113281, 70.02716064453125, 15.789804458618164, 51.773590087890625, -8.899742126464844, 89.14852905273438, 51.57389831542969, 9.268321990966797, 42.46150207519531, 13.892618179321289, 34.425071716308594, 61.38301086425781, 22.995864868164062, 58.52777099609375, 16.32807159423828, 54.586856842041016, 21.699806213378906, 11.9993896484375, -1.10589599609375, 41.512733459472656, 68.074951171875, 18.292274475097656, 69.60205841064453, 6.153411865234375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000330.npy"} +{"epoch": 0.4845814977973568, "step": 331, "batch_size": 64, "mean": 34.531578063964844, "std": 23.45041847229004, "min": -5.5077056884765625, "p10": 9.373161315917969, "median": 29.82193374633789, "p90": 67.04142227172854, "max": 96.83233642578125, "pos_frac": 0.953125, "sample": [18.811119079589844, 46.677764892578125, 54.450660705566406, 10.337875366210938, 34.816650390625, 25.13812255859375, 36.772483825683594, -0.8995361328125, 9.109466552734375, 26.574325561523438, 61.83148193359375, 20.322738647460938, 96.83233642578125, 4.3282012939453125, 44.61857223510742, 3.7662887573242188, 91.78093719482422, 8.43375015258789, 35.273895263671875, 86.65234375, 60.75341033935547, 41.34004211425781, 88.24111938476562, 33.812259674072266, 23.602970123291016, -1.47955322265625, 29.43938446044922, 9.988449096679688, 42.513816833496094, 30.204483032226562, 26.825092315673828, 24.233623504638672, 42.342552185058594, 25.620838165283203, 41.23731994628906, 12.312370300292969, 21.616802215576172, 17.466293334960938, 39.838775634765625, 88.02542877197266, 28.135353088378906, 12.977394104003906, 17.912424087524414, 47.629974365234375, 25.566131591796875, 16.32836151123047, 34.77461242675781, 69.27425384521484, 14.0162353515625, 49.8577880859375, -5.5077056884765625, 36.36884689331055, 16.040563583374023, 34.898231506347656, 46.692604064941406, 27.862777709960938, 46.12492370605469, 14.808853149414062, 23.001998901367188, 43.05314636230469, 51.33403015136719, 40.54740905761719, 77.90763854980469, 26.8504638671875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000331.npy"} +{"epoch": 0.48604992657856094, "step": 332, "batch_size": 64, "mean": 25.641084671020508, "std": 22.47309684753418, "min": -28.82213592529297, "p10": 0.036081886291505105, "median": 21.479843139648438, "p90": 55.032700347900395, "max": 83.51962280273438, "pos_frac": 0.890625, "sample": [59.248016357421875, 14.961814880371094, 20.230224609375, 38.8377799987793, 17.073272705078125, 5.7343597412109375, 38.14122772216797, 80.82290649414062, 59.04539489746094, 45.58564758300781, 36.50249481201172, 2.0954647064208984, 2.2306060791015625, 83.51962280273438, 13.974161148071289, 12.945722579956055, 30.400516510009766, 40.86192321777344, -1.8877792358398438, -1.9047565460205078, 43.07716369628906, 33.77262878417969, 37.95896911621094, 34.70419692993164, 52.59593200683594, 32.032833099365234, -7.917510986328125, 1.4217605590820312, 33.392059326171875, 54.81462860107422, 31.27862548828125, 3.483959197998047, 12.4603271484375, 1.2454833984375, -3.2428436279296875, 17.696388244628906, 63.669464111328125, 63.9456787109375, 9.553899765014648, 37.144317626953125, 37.15229797363281, 19.66977310180664, 45.392547607421875, 45.206886291503906, 9.390735626220703, 55.12615966796875, 21.67279052734375, 22.071006774902344, 16.92633056640625, 46.69844055175781, 20.82320785522461, 19.903724670410156, 42.863807678222656, 21.58782958984375, 37.016597747802734, -0.48223304748535156, -9.296829223632812, 2.4715518951416016, 7.220514297485352, 21.371856689453125, 17.512527465820312, 3.5220718383789062, 14.523326873779297, -28.82213592529297], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000332.npy"} +{"epoch": 0.48751835535976507, "step": 333, "batch_size": 64, "mean": 32.79645919799805, "std": 29.007871627807617, "min": -22.53701400756836, "p10": -0.09001903533935518, "median": 32.828224182128906, "p90": 68.4666732788086, "max": 106.86128234863281, "pos_frac": 0.890625, "sample": [-0.7310028076171875, 16.20703887939453, 70.47061157226562, -1.110107421875, 11.082557678222656, 24.700836181640625, 40.7677001953125, 51.451873779296875, 3.4059219360351562, -0.2136993408203125, 68.70657348632812, 10.117643356323242, 70.45869445800781, -6.392082214355469, 16.290634155273438, 7.8360595703125, 56.39368438720703, 16.234769821166992, 78.64877319335938, 20.74700164794922, -22.53701400756836, 33.393943786621094, 2.7699432373046875, 60.87092590332031, 65.98231506347656, 38.315765380859375, 32.89875030517578, 16.742279052734375, 47.328887939453125, 74.60393524169922, 7.988929748535156, 5.500711441040039, 0.9219207763671875, 66.66165161132812, 49.920806884765625, 48.1329345703125, 2.635448455810547, 0.19856834411621094, 58.753807067871094, 55.915733337402344, 16.823707580566406, 64.50985717773438, 6.459098815917969, 41.186256408691406, 67.90690612792969, 9.405521392822266, 35.418556213378906, 32.75769805908203, 106.86128234863281, 55.11131286621094, 58.675254821777344, 51.83306121826172, 99.28328704833984, 18.8280029296875, 25.02862548828125, -11.704559326171875, 55.16729736328125, 18.35767364501953, 61.17152404785156, 47.31163024902344, 10.085357666015625, -17.072540283203125, 36.954254150390625, 6.5405731201171875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000333.npy"} +{"epoch": 0.4889867841409692, "step": 334, "batch_size": 64, "mean": 31.349750518798828, "std": 22.043792724609375, "min": -15.223129272460938, "p10": 5.0769683837890645, "median": 31.351470947265625, "p90": 54.745633697509774, "max": 90.50738525390625, "pos_frac": 0.9375, "sample": [81.58090209960938, 16.90283203125, -2.06475830078125, 53.27312469482422, 42.19007873535156, 11.272506713867188, 17.73877716064453, 10.635986328125, 46.34074020385742, 13.448928833007812, 10.120758056640625, 28.857990264892578, 34.02361297607422, 45.391876220703125, 25.13109588623047, 41.75868225097656, 19.850584030151367, 50.627349853515625, -15.223129272460938, 38.017608642578125, 8.468395233154297, 38.902095794677734, 21.484275817871094, -10.781290054321289, 57.91947937011719, 34.411529541015625, 47.77238464355469, 6.8357696533203125, 13.634525299072266, 4.3231964111328125, 2.98895263671875, 11.810260772705078, 20.699539184570312, 33.82594299316406, 55.376708984375, 50.821807861328125, 35.93749237060547, 18.796236038208008, 32.77867889404297, 39.18365478515625, 42.64794158935547, 12.862030029296875, 35.12060546875, 24.52099609375, 0.8150405883789062, 21.817943572998047, 79.20281982421875, 45.924713134765625, 85.04135131835938, 46.77857208251953, 40.46296691894531, 29.92426300048828, 24.78852081298828, 25.91911506652832, 26.597023010253906, 52.89092254638672, 44.840911865234375, 19.606403350830078, 20.004180908203125, 39.719032287597656, -3.2519893646240234, 90.50738525390625, 46.63603973388672, 57.94218444824219], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000334.npy"} +{"epoch": 0.49045521292217326, "step": 335, "batch_size": 64, "mean": 27.685340881347656, "std": 26.116199493408203, "min": -15.110403060913086, "p10": 0.09156951904296923, "median": 21.923943519592285, "p90": 55.192546844482436, "max": 128.14276123046875, "pos_frac": 0.890625, "sample": [43.94622802734375, 45.88527297973633, 18.1956787109375, 6.916755676269531, 48.65943145751953, 8.908498764038086, 18.05634880065918, 56.380455017089844, 44.501686096191406, -4.669267654418945, 2.2607059478759766, 33.881683349609375, 16.433937072753906, 35.757240295410156, 51.78135681152344, 85.05659484863281, 7.7684478759765625, 33.59577178955078, -11.69363021850586, 25.80933380126953, 19.927474975585938, -1.5896930694580078, 52.42076110839844, 45.841796875, 47.98186492919922, 49.074737548828125, 37.28512191772461, 33.93632507324219, 1.3817062377929688, 50.173309326171875, 128.14276123046875, -5.690010070800781, 44.26741409301758, 6.820075988769531, 57.10078811645508, 21.52836799621582, 77.10516357421875, 38.2518310546875, 14.536605834960938, 36.64434814453125, -0.1066741943359375, 75.24969482421875, 2.3141021728515625, 0.55413818359375, 8.105854034423828, 9.400039672851562, 40.36455154418945, 14.523422241210938, -15.110403060913086, 20.147003173828125, 19.82094383239746, 0.6287612915039062, 59.81715393066406, 22.31951904296875, 7.385496139526367, 22.958202362060547, 42.1748046875, 51.62549591064453, 2.28582763671875, -7.038127899169922, 16.7940731048584, 34.95953369140625, 11.997135162353516, 8.117862701416016], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000335.npy"} +{"epoch": 0.4919236417033774, "step": 336, "batch_size": 64, "mean": 34.13089370727539, "std": 29.321842193603516, "min": -44.246299743652344, "p10": 3.0404552459716805, "median": 30.8962459564209, "p90": 72.7255630493164, "max": 129.95404052734375, "pos_frac": 0.90625, "sample": [15.839218139648438, 18.743408203125, 85.86990356445312, 49.19447326660156, 17.95134162902832, 50.403968811035156, 17.24530792236328, 32.14739227294922, -7.039157867431641, 107.1201171875, -6.736289978027344, 12.643310546875, 22.381752014160156, 30.77503204345703, 129.95404052734375, -2.694009780883789, 82.26664733886719, 21.80766487121582, 22.66826629638672, 41.290985107421875, -14.240169525146484, 36.23841857910156, 65.10798645019531, 22.38506317138672, 2.7483367919921875, 35.02490997314453, 30.976367950439453, 39.67531967163086, 45.86151123046875, 26.939315795898438, 23.8266544342041, 15.774200439453125, 30.816123962402344, 14.997276306152344, 27.73217010498047, 41.80708312988281, -10.507049560546875, 33.300811767578125, 14.40316390991211, 44.94319152832031, 28.290374755859375, 57.97577667236328, 55.59039306640625, 34.08428955078125, 73.36810302734375, 10.468772888183594, -44.246299743652344, 25.809036254882812, 34.90711212158203, 40.654693603515625, 22.643985748291016, 36.5944938659668, 71.22630310058594, 28.720678329467773, 14.779571533203125, 54.04386901855469, 49.970603942871094, 97.98678588867188, 3.722064971923828, 37.55096435546875, 4.797996520996094, 54.79191589355469, 76.13758850097656, 44.86407470703125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000336.npy"} +{"epoch": 0.4933920704845815, "step": 337, "batch_size": 64, "mean": 30.773639678955078, "std": 22.813125610351562, "min": -14.600936889648438, "p10": 4.215460968017579, "median": 30.99468231201172, "p90": 60.56119918823243, "max": 85.46808624267578, "pos_frac": 0.953125, "sample": [75.51119995117188, 80.53032684326172, -14.600936889648438, -2.851226806640625, 53.60649108886719, 33.10028076171875, 0.6034774780273438, 14.832687377929688, 6.430469512939453, 31.020751953125, 5.882568359375, 32.0220947265625, 32.184242248535156, 47.59339904785156, 61.62206268310547, 15.378005981445312, 54.441734313964844, 44.322208404541016, 5.384742736816406, 41.971580505371094, 9.51275634765625, 25.597530364990234, 21.126306533813477, 53.431793212890625, 27.55752182006836, 22.069580078125, 33.60481262207031, 10.742721557617188, 10.775276184082031, 19.162321090698242, 10.36349868774414, 29.685096740722656, 51.246116638183594, 1.0967864990234375, 18.50179672241211, 24.227203369140625, 30.968612670898438, -7.175376892089844, 32.234474182128906, 35.91551971435547, 14.782428741455078, 85.46808624267578, 6.25384521484375, 7.188743591308594, 9.197257995605469, 17.158252716064453, 44.67463684082031, 38.29203796386719, 54.74458694458008, 37.28843688964844, 46.52273941040039, 32.594261169433594, 56.43731689453125, 67.46510314941406, 22.506057739257812, 79.36520385742188, 41.10585021972656, 39.47120666503906, 3.7143402099609375, 59.445953369140625, 39.7034912109375, 24.187484741210938, 1.2759437561035156, 61.039161682128906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000337.npy"} +{"epoch": 0.4948604992657856, "step": 338, "batch_size": 64, "mean": 30.744945526123047, "std": 24.816186904907227, "min": -4.947551727294922, "p10": 3.4065391540527354, "median": 27.370797157287598, "p90": 72.5469841003418, "max": 80.85774230957031, "pos_frac": 0.9375, "sample": [56.12481689453125, 73.89776611328125, 3.0072479248046875, 4.441009521484375, 35.602203369140625, 5.8609161376953125, 7.124265670776367, 39.82948303222656, 73.66397857666016, 27.528472900390625, 4.338218688964844, 11.508171081542969, 13.339315414428711, 72.15985107421875, -3.56280517578125, 9.096786499023438, 35.94060516357422, 30.053878784179688, 43.049007415771484, 67.60783386230469, 41.525146484375, 1.4753189086914062, 34.98577880859375, 30.577499389648438, 31.212020874023438, 28.916702270507812, 34.674652099609375, -4.947551727294922, 11.490997314453125, 22.324485778808594, 23.16659164428711, 69.39921569824219, 72.71289825439453, -4.232341766357422, 15.43792724609375, 20.658592224121094, 6.603675842285156, 17.101470947265625, 51.988502502441406, 20.94997215270996, 40.7359619140625, 17.158788681030273, 7.695442199707031, -1.3013839721679688, 79.1287841796875, 8.443277359008789, 30.71331024169922, 11.719963073730469, 11.470779418945312, 27.21312141418457, 10.32305908203125, 4.811456680297852, 52.45314025878906, 80.85774230957031, 41.32377624511719, 58.75956726074219, 75.01754760742188, 26.058826446533203, 57.16792297363281, 44.59137725830078, 80.3785400390625, 14.022880554199219, 2.969259262084961, 49.33085632324219], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000338.npy"} +{"epoch": 0.49632892804698975, "step": 339, "batch_size": 64, "mean": 34.47539520263672, "std": 26.293350219726562, "min": -12.351924896240234, "p10": 1.260022735595705, "median": 32.47867965698242, "p90": 68.70642700195313, "max": 107.76193237304688, "pos_frac": 0.921875, "sample": [41.51512908935547, 39.62919998168945, 20.672733306884766, -9.148927688598633, 71.48957824707031, 16.402267456054688, 42.73579406738281, 40.85849380493164, 71.63223266601562, 6.6774749755859375, 107.76193237304688, 13.379188537597656, 55.78302001953125, 16.238067626953125, 44.89314270019531, 65.81854248046875, 5.632871627807617, 11.825531005859375, 16.926536560058594, 54.034759521484375, 40.94068908691406, 11.314964294433594, 43.25151824951172, -1.2343215942382812, 32.97972869873047, 23.712291717529297, 54.920013427734375, 17.25043487548828, 78.12083435058594, 104.08171081542969, -0.9076747894287109, 50.25876235961914, 30.847023010253906, 0.5307388305664062, 103.36666107177734, 35.652801513671875, 31.288116455078125, 10.019994735717773, 27.081817626953125, 20.331649780273438, 15.788238525390625, 22.740638732910156, 31.977630615234375, 43.382568359375, 19.909072875976562, 52.267356872558594, 2.9616851806640625, 63.607421875, 15.125263214111328, -12.351924896240234, 43.494564056396484, 69.944091796875, 40.07721710205078, 38.303199768066406, 31.28152847290039, 39.15138244628906, 0.1298980712890625, -5.482719421386719, 38.79767608642578, 28.45184326171875, 52.38932800292969, 31.440948486328125, 37.66047668457031, 56.81261444091797], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000339.npy"} +{"epoch": 0.4977973568281938, "step": 340, "batch_size": 64, "mean": 33.67200469970703, "std": 30.476184844970703, "min": -7.860015869140625, "p10": 2.7437366485595702, "median": 30.224653244018555, "p90": 67.39425659179688, "max": 150.76434326171875, "pos_frac": 0.921875, "sample": [39.12395477294922, 69.21541595458984, 49.49346160888672, 26.57233428955078, 37.14707946777344, 27.593822479248047, 79.83393859863281, 2.6939239501953125, 40.88592529296875, 8.089715957641602, 55.603302001953125, 150.76434326171875, 47.2928466796875, 1.0789241790771484, 67.16664123535156, 22.138519287109375, 66.15032196044922, 36.357906341552734, 32.18274688720703, 10.363445281982422, 32.78070831298828, 28.266559600830078, -1.2809982299804688, 94.69819641113281, 3.456390380859375, 39.66534423828125, -7.088289260864258, 33.700340270996094, 6.281349182128906, 43.495262145996094, 57.890716552734375, 40.48200988769531, 136.77151489257812, 32.81349182128906, 3.5907554626464844, 16.90968132019043, 18.996212005615234, 36.207191467285156, 7.096733093261719, 25.015634536743164, 44.302818298339844, 20.136207580566406, 7.36895751953125, 25.456554412841797, -3.6977081298828125, 74.2906723022461, 10.832672119140625, 9.175743103027344, 65.8824234008789, 42.69215393066406, 10.625389099121094, 67.49180603027344, 51.15643310546875, 14.186691284179688, 24.00798797607422, 26.7125244140625, 19.050941467285156, 2.859966278076172, 42.470794677734375, 41.565093994140625, -1.6803741455078125, 40.30720901489258, -7.860015869140625, 8.175987243652344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000340.npy"} +{"epoch": 0.49926578560939794, "step": 341, "batch_size": 64, "mean": 37.13330078125, "std": 26.232938766479492, "min": -10.299606323242188, "p10": 5.408286666870119, "median": 36.11475944519043, "p90": 71.16452026367188, "max": 92.7139892578125, "pos_frac": 0.9375, "sample": [92.7139892578125, -4.307342529296875, 9.139915466308594, 7.514341354370117, 20.001426696777344, 50.38719177246094, 40.146026611328125, 69.79020690917969, 41.41722869873047, 27.219562530517578, 25.576425552368164, 43.72526550292969, 64.91675567626953, 45.118812561035156, 25.052541732788086, 11.576980590820312, 14.539596557617188, 86.70924377441406, 41.78142547607422, 41.76458740234375, 6.482978820800781, 78.0778579711914, 65.56362915039062, 54.628265380859375, 22.96532440185547, 71.4757080078125, 42.10527801513672, 43.93983459472656, 32.05684280395508, -10.299606323242188, 7.1004180908203125, 70.43841552734375, 66.6041259765625, -1.69696044921875, 12.402393341064453, 51.51133728027344, 50.97016906738281, -0.5673027038574219, 3.4270801544189453, 18.890365600585938, 82.14773559570312, 39.70152282714844, 32.52799606323242, 11.46600341796875, 4.947704315185547, 28.290985107421875, 16.772903442382812, 69.69342803955078, 2.1358566284179688, 64.71253967285156, 17.7043514251709, 43.811424255371094, 31.452224731445312, 14.443389892578125, 59.93727111816406, 26.511756896972656, 87.69770812988281, 40.255889892578125, 61.32264709472656, 19.61929702758789, 55.626068115234375, 28.94428062438965, 82.28507232666016, 13.662803649902344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000341.npy"} +{"epoch": 0.5007342143906021, "step": 342, "batch_size": 64, "mean": 32.50803756713867, "std": 29.156147003173828, "min": -8.837249755859375, "p10": -1.4624504089355463, "median": 30.59192657470703, "p90": 76.82030334472657, "max": 108.76419067382812, "pos_frac": 0.875, "sample": [74.43582153320312, 0.01617431640625, -4.164825439453125, 30.84954833984375, 29.502368927001953, 28.504905700683594, 61.96398162841797, 21.467636108398438, 11.57269287109375, 15.903678894042969, 10.037956237792969, 51.39080810546875, 43.75665283203125, 20.085044860839844, 106.10284423828125, 19.875755310058594, -0.8472213745117188, 34.98065185546875, 31.865371704101562, -1.78619384765625, 30.334304809570312, 28.494735717773438, 40.36289978027344, 4.255805969238281, -1.7261199951171875, 33.203521728515625, 96.74217987060547, 23.872344970703125, 19.64168357849121, 6.7394256591796875, 1.4596748352050781, 108.76419067382812, 43.729393005371094, 3.809558868408203, 0.5386962890625, -5.164131164550781, 79.62621307373047, 107.93912506103516, 66.89659118652344, 26.34156036376953, -3.8513545989990234, 38.93944549560547, 37.55046081542969, -8.837249755859375, 55.15924072265625, 77.84222412109375, 39.4415283203125, 43.123802185058594, 25.658395767211914, 48.42181396484375, 80.5037841796875, 16.518699645996094, 14.160629272460938, 31.74755859375, 34.79331588745117, 6.3022003173828125, 32.43110275268555, 32.49769592285156, 0.45829010009765625, 38.083465576171875, 45.63619613647461, 55.33943176269531, 43.750938415527344, -6.532524108886719], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000342.npy"} +{"epoch": 0.5022026431718062, "step": 343, "batch_size": 64, "mean": 34.53588104248047, "std": 24.96697235107422, "min": -20.331771850585938, "p10": 4.863111114501954, "median": 35.161678314208984, "p90": 67.39685516357422, "max": 91.47411346435547, "pos_frac": 0.90625, "sample": [49.538177490234375, 13.336814880371094, 67.29560852050781, 22.69135284423828, 28.37757682800293, -1.833038330078125, 12.880146026611328, 53.446372985839844, 5.0273284912109375, 56.39418029785156, 5.7549896240234375, 46.86248779296875, 23.370281219482422, 26.51104736328125, 36.29548645019531, 14.703136444091797, 27.56647491455078, 44.28032684326172, 43.59058380126953, 70.10507202148438, 62.310550689697266, 80.73956298828125, 14.039703369140625, 62.56251525878906, 73.34791564941406, 34.37316131591797, 19.153776168823242, 9.733078002929688, 56.5767822265625, 45.218505859375, -20.331771850585938, 18.521717071533203, 72.00562286376953, -11.706741333007812, 28.26457977294922, 14.452972412109375, 50.16798400878906, 20.901742935180664, 6.109174728393555, 43.34632873535156, -3.692138671875, 73.12802124023438, 26.13066864013672, 22.089126586914062, 19.93238067626953, 67.44024658203125, 63.32826232910156, 43.12989044189453, 35.9501953125, 55.09223175048828, 20.783843994140625, 64.94073486328125, 45.42816925048828, 11.427314758300781, 37.27445983886719, 38.17057800292969, 36.75291442871094, 43.40203857421875, 29.040306091308594, 4.792732238769531, -2.1014938354492188, 66.0440673828125, 91.47411346435547, -5.6437530517578125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000343.npy"} +{"epoch": 0.5036710719530103, "step": 344, "batch_size": 64, "mean": 28.359664916992188, "std": 29.36441993713379, "min": -16.37274932861328, "p10": -1.169548034667968, "median": 23.09178924560547, "p90": 65.51976852416993, "max": 135.75808715820312, "pos_frac": 0.875, "sample": [7.255290985107422, 16.959875106811523, -16.37274932861328, -6.299335479736328, 17.642223358154297, 44.998451232910156, 19.28021812438965, 22.155197143554688, 0.6088409423828125, 17.392593383789062, 7.13115119934082, 47.904754638671875, -7.9870452880859375, 48.34611511230469, 8.823760986328125, 21.49378204345703, 42.36735534667969, 16.292694091796875, -0.454925537109375, 38.29228210449219, 0.42201995849609375, 13.641304016113281, 5.018640518188477, 26.17122459411621, 24.02838134765625, 28.503265380859375, 38.69801330566406, 63.02020263671875, 8.283285140991211, 6.627471923828125, 85.33880615234375, 26.481597900390625, 40.88062286376953, 3.9508304595947266, 20.838531494140625, 36.743896484375, 46.6226806640625, -12.07470703125, 57.61077880859375, 26.40448570251465, 95.89376831054688, 39.12602233886719, 28.119260787963867, 28.427146911621094, 1.9029426574707031, 135.75808715820312, 60.21517562866211, 44.12649154663086, 15.56273078918457, 79.11824035644531, 2.244314193725586, 1.972015380859375, 29.270397186279297, 86.6616439819336, 16.447418212890625, 29.357818603515625, -1.645944595336914, 34.704437255859375, 88.9355697631836, -15.784423828125, 40.446075439453125, 16.002363204956055, 66.59101104736328, -1.4758148193359375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000344.npy"} +{"epoch": 0.5051395007342144, "step": 345, "batch_size": 64, "mean": 34.33030319213867, "std": 26.923765182495117, "min": -30.57220458984375, "p10": 3.056636047363282, "median": 33.63469314575195, "p90": 67.07272720336915, "max": 131.99282836914062, "pos_frac": 0.9375, "sample": [3.7365875244140625, 131.99282836914062, 48.220829010009766, 31.12701416015625, 19.07537269592285, -5.286521911621094, 59.109375, 19.843223571777344, 35.22361373901367, 30.643753051757812, 1.9070606231689453, 41.44532775878906, 39.633453369140625, 70.71664428710938, 42.207496643066406, 38.022979736328125, -4.0167236328125, 40.66021728515625, 6.219501495361328, 30.595947265625, 2.765228271484375, 47.62736511230469, 18.078369140625, 22.13631820678711, 53.35839080810547, 39.97654724121094, 19.160465240478516, 64.92744445800781, 38.081459045410156, 34.14396667480469, 19.871856689453125, 67.99213409423828, 23.261688232421875, 52.10227966308594, 14.161834716796875, 45.84959411621094, 33.96803283691406, 71.4638671875, 22.030731201171875, 38.6005744934082, 21.92603302001953, 116.071533203125, 44.983951568603516, 11.544090270996094, 44.670440673828125, 42.76203918457031, 13.786331176757812, 69.34087371826172, 20.6221923828125, 27.524417877197266, 46.45252990722656, 2.1770095825195312, 22.712310791015625, 47.30335998535156, -7.500698089599609, 19.83765411376953, 6.747526168823242, 28.252334594726562, 50.14344787597656, 21.01184844970703, 48.05663299560547, 85.34835815429688, -30.57220458984375, 33.301353454589844], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000345.npy"} +{"epoch": 0.5066079295154186, "step": 346, "batch_size": 64, "mean": 30.47341537475586, "std": 26.77487564086914, "min": -21.124984741210938, "p10": -2.7227661132812453, "median": 23.638474464416504, "p90": 61.30450210571291, "max": 106.96741485595703, "pos_frac": 0.890625, "sample": [56.39148712158203, 7.4067840576171875, -9.817375183105469, 63.41007995605469, 91.983154296875, 12.491771697998047, 90.10153198242188, 23.802881240844727, 43.72449493408203, 106.96741485595703, -21.124984741210938, 16.680824279785156, 49.73808288574219, -14.780830383300781, 21.397682189941406, 36.22161865234375, 11.542167663574219, 51.49385070800781, 71.76568603515625, 9.776206970214844, 48.781707763671875, 13.6453857421875, -9.687286376953125, 53.422027587890625, 21.920639038085938, 41.0194206237793, 44.235374450683594, 15.002113342285156, 36.88550567626953, 18.2159423828125, 17.386995315551758, 10.282699584960938, 23.47406768798828, 21.509492874145508, 34.85918426513672, 44.62656784057617, 55.05525207519531, 12.924381256103516, 48.7358512878418, 14.104385375976562, 1.9009552001953125, 41.613006591796875, 36.43684387207031, 67.74055480957031, 22.21333122253418, 10.993751525878906, 52.275535583496094, 50.55234909057617, 10.738662719726562, 19.56032943725586, 38.34315490722656, 24.13113021850586, 54.60009765625, -7.49528694152832, 11.009706497192383, 45.30677795410156, 13.312929153442383, 17.15521812438965, 85.97672271728516, -4.7043609619140625, -4.8782196044921875, 3.6984100341796875, 28.712190628051758, 45.53257751464844], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000346.npy"} +{"epoch": 0.5080763582966226, "step": 347, "batch_size": 64, "mean": 31.062728881835938, "std": 27.517837524414062, "min": -29.5986328125, "p10": 1.6593750000000012, "median": 26.508434295654297, "p90": 66.43281860351563, "max": 130.65731811523438, "pos_frac": 0.921875, "sample": [1.1412506103515625, 42.19472122192383, 23.57647705078125, 59.19548416137695, 15.172172546386719, 34.249534606933594, 19.859115600585938, 29.452987670898438, 13.800323486328125, 30.866622924804688, 37.738555908203125, 12.719066619873047, 25.959365844726562, 13.50421142578125, 46.10436248779297, -29.5986328125, -11.954170227050781, 17.823272705078125, 70.04325866699219, 17.715938568115234, 34.32171630859375, -3.9884986877441406, 26.397842407226562, 63.34364318847656, 12.558685302734375, 40.61138153076172, 66.56866455078125, 57.17389678955078, 12.359447479248047, 26.121734619140625, 33.187156677246094, 21.222885131835938, 64.41776275634766, 48.48712921142578, 12.38827896118164, 11.084312438964844, 65.01947021484375, 42.42588806152344, 8.14453125, -18.06717300415039, 35.49639129638672, 74.0830078125, 2.8683319091796875, 56.76652908325195, 20.8043212890625, 29.34500503540039, 19.184677124023438, 39.477516174316406, 87.45736694335938, 12.332387924194336, 38.33140563964844, 66.1158447265625, 43.298099517822266, 130.65731811523438, 7.098791122436523, 33.828857421875, 16.839134216308594, -16.166046142578125, 69.55284881591797, 13.011455535888672, 70.09954071044922, 26.61902618408203, 17.052040100097656, 0.5180816650390625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000347.npy"} +{"epoch": 0.5095447870778267, "step": 348, "batch_size": 64, "mean": 30.16468048095703, "std": 26.948823928833008, "min": -18.96076011657715, "p10": -1.7261962890624982, "median": 31.315552711486816, "p90": 62.333581924438484, "max": 93.81663513183594, "pos_frac": 0.875, "sample": [60.96387481689453, 0.17043304443359375, 39.08612823486328, 54.234832763671875, 58.95293426513672, 14.849433898925781, -0.1087493896484375, 18.116357803344727, 62.82463073730469, -11.6505126953125, 34.2413215637207, 16.832109451293945, 52.121856689453125, 33.43287658691406, 31.790756225585938, 31.960201263427734, 2.1209659576416016, 38.73677062988281, 40.409725189208984, 2.4333343505859375, 44.14990234375, 64.13419342041016, 2.885040283203125, 89.29953002929688, 52.35679626464844, 57.20153045654297, -3.3163833618164062, 72.0473861694336, 5.996583938598633, 15.175460815429688, 52.945037841796875, 16.41042709350586, -4.115570068359375, 93.81663513183594, 88.04782104492188, 21.214385986328125, 35.7632942199707, 1.5493850708007812, 11.91556167602539, 30.840349197387695, -3.9762744903564453, 29.758834838867188, 5.969474792480469, 55.41260528564453, 58.373748779296875, 15.26424789428711, 34.968894958496094, 8.045465469360352, 53.64714813232422, 14.036056518554688, 1.1550064086914062, 38.960365295410156, 57.238983154296875, 29.711318969726562, 61.187801361083984, 38.494720458984375, -7.1499481201171875, 2.204540252685547, 9.093780517578125, 65.96871948242188, 46.1805419921875, 7.536865234375, -2.4193878173828125, -18.96076011657715], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000348.npy"} +{"epoch": 0.5110132158590308, "step": 349, "batch_size": 64, "mean": 25.91054916381836, "std": 29.782981872558594, "min": -27.220382690429688, "p10": -3.0449119567871095, "median": 23.10346031188965, "p90": 62.8754852294922, "max": 122.14718627929688, "pos_frac": 0.8125, "sample": [-2.939239501953125, -5.7603607177734375, 20.341949462890625, 29.867887496948242, 11.017967224121094, 16.004180908203125, 51.34400177001953, 2.631624221801758, 23.521312713623047, 43.2725830078125, 27.253093719482422, 35.13758087158203, 19.0849666595459, 20.904876708984375, 15.093160629272461, 4.484392166137695, 14.390039443969727, 7.5252838134765625, 31.127403259277344, 22.68999481201172, 64.3270263671875, 34.45396423339844, -0.9271087646484375, 8.452674865722656, 39.56217956542969, -0.9955711364746094, 44.27919006347656, -3.016387939453125, 28.918901443481445, 33.06177520751953, 122.14718627929688, -3.0571365356445312, -17.762470245361328, 1.1959075927734375, 49.60662078857422, -15.761817932128906, -23.31206512451172, 25.18701934814453, 92.56768798828125, 23.379863739013672, 69.75204467773438, -27.220382690429688, -4.697086334228516, 28.55957794189453, 57.582618713378906, 67.3799057006836, 32.84822082519531, 12.794937133789062, 106.23896789550781, 8.4185791015625, 2.465545654296875, 59.488555908203125, 0.6581268310546875, 23.054174423217773, 16.642822265625, 45.502891540527344, 5.548614501953125, 31.521453857421875, 24.636852264404297, 53.741363525390625, 33.39915466308594, 23.152746200561523, -0.276947021484375, 97.78228759765625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000349.npy"} +{"epoch": 0.5124816446402349, "step": 350, "batch_size": 64, "mean": 27.558395385742188, "std": 28.14163589477539, "min": -22.883224487304688, "p10": -2.161911010742185, "median": 20.798490524291992, "p90": 64.89746856689456, "max": 106.83795166015625, "pos_frac": 0.890625, "sample": [106.83795166015625, -22.577682495117188, 12.447124481201172, 71.49275970458984, 40.84589385986328, 36.215972900390625, 51.94068145751953, 0.347930908203125, -18.495864868164062, 19.54207992553711, 20.182586669921875, 57.144020080566406, -5.7574462890625, 29.141054153442383, 35.70021057128906, 5.31536865234375, 92.15261840820312, 37.419677734375, 48.99639892578125, 50.153472900390625, 68.22037506103516, 19.28280258178711, 11.749979019165039, 8.385398864746094, -3.1619873046875, 53.48692321777344, 10.947776794433594, 0.171600341796875, 43.336524963378906, 45.361412048339844, 40.74045181274414, 4.616634368896484, 30.81183624267578, 20.124366760253906, 0.5397109985351562, 15.524383544921875, 12.457893371582031, 38.07356262207031, -6.964424133300781, 22.7205810546875, 51.506683349609375, 81.0567626953125, 2.1137924194335938, 1.951629638671875, 5.9346923828125, 49.29255294799805, 27.631996154785156, 37.85270690917969, 32.92449951171875, 32.282928466796875, -7.529216766357422, 97.81599426269531, -22.883224487304688, 41.50614929199219, 21.41439437866211, 4.379539489746094, 11.187744140625, 15.599853515625, 5.69610595703125, 39.550453186035156, 14.399467468261719, 15.890602111816406, 19.658763885498047, 79.03175354003906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000350.npy"} +{"epoch": 0.5139500734214391, "step": 351, "batch_size": 64, "mean": 34.37738037109375, "std": 24.538415908813477, "min": -12.101791381835938, "p10": 6.778041648864749, "median": 33.51721954345703, "p90": 65.31790008544922, "max": 97.97732543945312, "pos_frac": 0.90625, "sample": [28.824079513549805, 43.83856964111328, 63.17999267578125, 61.340087890625, 54.20207977294922, 29.384403228759766, -3.9073753356933594, 28.476829528808594, 62.75160217285156, 55.20367431640625, 17.1981201171875, 44.54259490966797, 73.13967895507812, 69.47627258300781, 57.77592468261719, 44.938255310058594, 40.3175048828125, 66.23414611816406, 48.715667724609375, 41.453948974609375, -5.096168518066406, 23.2576904296875, 50.39729309082031, 50.1893310546875, 24.82303237915039, 22.09006118774414, 13.933555603027344, 40.0792350769043, -12.101791381835938, -0.27850341796875, 15.788818359375, 18.913856506347656, -7.735836029052734, 38.265953063964844, 9.953125, 97.97732543945312, 17.079429626464844, 22.387451171875, 40.6868896484375, 22.87200927734375, 10.000839233398438, 34.713287353515625, 12.78884506225586, 91.57416534423828, 17.372772216796875, 5.579751968383789, 49.959938049316406, 9.574050903320312, 16.510231018066406, -10.298385620117188, 18.102479934692383, 48.22796630859375, 27.119461059570312, 18.404159545898438, 85.15582275390625, 33.02268981933594, 38.49364471435547, 77.79953002929688, 12.628196716308594, 36.54310607910156, 47.17937469482422, 47.543479919433594, 34.011749267578125, 27.5762939453125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000351.npy"} +{"epoch": 0.5154185022026432, "step": 352, "batch_size": 64, "mean": 26.859298706054688, "std": 27.565814971923828, "min": -23.005859375, "p10": -3.8504777908325196, "median": 24.386627197265625, "p90": 62.98154411315919, "max": 132.36622619628906, "pos_frac": 0.828125, "sample": [46.24268341064453, 60.77479934692383, 33.14781951904297, 19.953407287597656, -5.93231201171875, 9.638385772705078, 46.32238006591797, 50.96931457519531, 29.60430908203125, 24.15973663330078, 82.87530517578125, 77.28144836425781, 25.434894561767578, 26.611839294433594, 58.4476432800293, -3.8459529876708984, 6.33598518371582, 83.95123291015625, 11.091032028198242, 29.417329788208008, 24.61351776123047, 132.36622619628906, 25.733322143554688, 63.92729187011719, 14.374496459960938, 21.774505615234375, -3.0359153747558594, -5.8409423828125, 28.445072174072266, 11.764495849609375, 51.21575927734375, 65.16722869873047, 13.286882400512695, 8.207296371459961, 64.51585388183594, 27.862564086914062, -1.499908447265625, 10.719066619873047, 14.43768310546875, 27.41747283935547, 42.78485107421875, 4.39019775390625, 48.20164108276367, -23.005859375, -7.6150054931640625, -3.909637451171875, -3.8524169921875, -2.8913650512695312, 47.9739990234375, 1.0035781860351562, 26.59003257751465, 2.282745361328125, 7.200099945068359, 21.119949340820312, 25.679569244384766, 27.26470184326172, 59.06761169433594, -4.627296447753906, 52.50823974609375, 25.864545822143555, 21.003997802734375, 11.763824462890625, 18.516246795654297, 13.74752426147461], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000352.npy"} +{"epoch": 0.5168869309838473, "step": 353, "batch_size": 64, "mean": 26.92256736755371, "std": 27.726051330566406, "min": -23.348133087158203, "p10": -4.986428070068359, "median": 24.842851638793945, "p90": 66.68102798461915, "max": 86.840087890625, "pos_frac": 0.84375, "sample": [26.147964477539062, 28.061534881591797, 23.17542266845703, 77.00381469726562, 13.238105773925781, 28.613056182861328, 11.433622360229492, 2.990039825439453, 71.30242919921875, 14.657108306884766, -0.54986572265625, -8.950996398925781, 4.688545227050781, 68.23360443115234, 50.44834899902344, 52.43975067138672, 6.795871734619141, 3.3187408447265625, 86.840087890625, 32.476600646972656, 20.82384490966797, 38.73386001586914, 24.506771087646484, 9.775140762329102, -5.508182525634766, 55.61528015136719, -19.38294219970703, 37.98235321044922, 4.240287780761719, 39.560028076171875, 0.5576267242431641, 4.890586853027344, 63.058349609375, 7.4422454833984375, -3.326751708984375, -23.348133087158203, 54.78294372558594, 4.00372314453125, 48.133148193359375, 1.3969593048095703, 40.9068603515625, 79.16766357421875, 17.654075622558594, 25.178932189941406, 25.584224700927734, 46.822227478027344, 46.66516876220703, 39.76448059082031, -22.748748779296875, 77.02569580078125, 23.91820526123047, -5.078582763671875, 49.918434143066406, 61.23392868041992, 52.43962097167969, 85.44845581054688, 9.363319396972656, 10.411331176757812, 33.942474365234375, -12.68031120300293, 10.165210723876953, 41.39171600341797, -4.771400451660156, 35.020423889160156], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000353.npy"} +{"epoch": 0.5183553597650514, "step": 354, "batch_size": 64, "mean": 31.843639373779297, "std": 21.73491859436035, "min": -7.249900817871094, "p10": 4.536321258544922, "median": 32.944753646850586, "p90": 53.794873046875004, "max": 114.24858856201172, "pos_frac": 0.96875, "sample": [53.550926208496094, 53.89942169189453, 55.585723876953125, 17.13672637939453, 68.40380859375, 18.208587646484375, 48.81755065917969, -0.4352684020996094, 41.03087615966797, 1.3120269775390625, 10.548751831054688, 3.471609115600586, 17.30394744873047, 69.71957397460938, 35.856361389160156, 34.363258361816406, 13.384628295898438, 51.547576904296875, 44.46818542480469, 37.92429733276367, 22.984451293945312, 36.248558044433594, 19.585670471191406, 26.696136474609375, 2.7861099243164062, 40.5184211730957, 49.05297088623047, 34.730140686035156, 34.938880920410156, 22.50539779663086, 34.953704833984375, 8.621816635131836, 49.76865768432617, 47.71726989746094, 35.58186721801758, 27.71484375, -7.249900817871094, 35.03961944580078, 29.252357482910156, 0.58251953125, 23.915695190429688, 19.418235778808594, 43.0025634765625, 5.4423980712890625, 30.32811737060547, 41.68910217285156, 23.482391357421875, 114.24858856201172, 42.36772155761719, 10.958984375, 12.684341430664062, 41.774444580078125, 16.791778564453125, 33.707000732421875, 4.148002624511719, 66.93927001953125, 25.922042846679688, 48.56758117675781, 84.02392578125, 12.483634948730469, 47.1442756652832, 32.1825065612793, 13.0584716796875, 15.583759307861328], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000354.npy"} +{"epoch": 0.5198237885462555, "step": 355, "batch_size": 64, "mean": 33.53764343261719, "std": 29.216278076171875, "min": -6.269006729125977, "p10": 5.371367645263673, "median": 23.807796478271484, "p90": 85.78272781372073, "max": 109.44403076171875, "pos_frac": 0.921875, "sample": [8.259056091308594, 108.544921875, 11.385536193847656, 94.93014526367188, 7.1675262451171875, 33.80738067626953, 34.920738220214844, 81.42091369628906, -0.6379241943359375, 62.79005432128906, 23.917160034179688, 91.78240966796875, -1.6971054077148438, 6.196008682250977, 40.53831481933594, 23.69843292236328, 24.640174865722656, 109.44403076171875, 14.263845443725586, 52.54814147949219, 38.71099853515625, 18.889015197753906, 20.645652770996094, 10.002208709716797, 20.146724700927734, 46.273162841796875, 87.6520767211914, 12.703834533691406, 7.001335144042969, 15.704063415527344, 35.19267654418945, 59.890777587890625, 8.820693969726562, -1.9248199462890625, 37.533512115478516, 5.017950057983398, 62.36311340332031, 93.41938781738281, 30.02923583984375, 104.2304458618164, 16.729270935058594, 42.54022979736328, 3.528308868408203, 10.354339599609375, 51.61359405517578, 61.469825744628906, 23.669164657592773, 43.376739501953125, 12.664302825927734, 39.333335876464844, 22.192790985107422, 22.202590942382812, 53.79499816894531, 26.242164611816406, -6.269006729125977, 18.720382690429688, 18.564666748046875, 15.67596435546875, 22.51589584350586, -2.904865264892578, 25.5660400390625, 19.33411407470703, 36.612449645996094, 28.660018920898438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000355.npy"} +{"epoch": 0.5212922173274597, "step": 356, "batch_size": 64, "mean": 22.78453826904297, "std": 28.312274932861328, "min": -23.502723693847656, "p10": -6.611024093627929, "median": 13.863311767578125, "p90": 64.6887405395508, "max": 105.53865051269531, "pos_frac": 0.78125, "sample": [36.51915740966797, 8.062568664550781, 39.01640319824219, 19.54924774169922, -4.103244781494141, -0.6641483306884766, 3.8411331176757812, 45.851707458496094, 19.408889770507812, 19.53997802734375, 19.401317596435547, 10.976682662963867, 84.25395202636719, 105.53865051269531, 8.291107177734375, 36.30424880981445, -3.6083946228027344, 2.2262725830078125, -10.832923889160156, -3.3549652099609375, -7.709175109863281, 70.99349212646484, 4.2809295654296875, 14.599273681640625, 38.89509582519531, 46.54853057861328, 13.127349853515625, 94.75796508789062, 5.242095947265625, 26.203290939331055, -5.423255920410156, 34.41032409667969, 12.930999755859375, 18.41058349609375, 43.42463684082031, 4.422191619873047, 52.27471160888672, 73.32984161376953, -18.805513381958008, 14.776016235351562, 37.62997817993164, -1.68292236328125, 8.914249420166016, 57.11656188964844, 6.610540390014648, 3.7933425903320312, 38.405059814453125, -7.120067596435547, 67.9339599609375, -14.764163970947266, 16.632766723632812, -1.82354736328125, -7.7833709716796875, 2.1405792236328125, 10.21513557434082, 11.523033142089844, 11.303977966308594, 30.79082489013672, -23.502723693847656, 53.266632080078125, 74.85772705078125, 53.51606750488281, 12.177558898925781, 45.152305603027344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000356.npy"} +{"epoch": 0.5227606461086637, "step": 357, "batch_size": 64, "mean": 31.656475067138672, "std": 24.30622100830078, "min": -16.50714111328125, "p10": 3.7257846832275394, "median": 27.9426908493042, "p90": 62.3351318359375, "max": 103.48207092285156, "pos_frac": 0.921875, "sample": [47.675071716308594, 80.26419067382812, 14.826568603515625, 30.66057586669922, 57.65071105957031, 18.01629638671875, 46.03681945800781, 37.92878723144531, 8.588768005371094, 62.483795166015625, 40.59864807128906, 7.9143218994140625, 13.672904968261719, 35.564422607421875, 11.415435791015625, 20.114479064941406, 31.65862274169922, 22.833412170410156, 61.40916442871094, -14.009811401367188, 72.82292175292969, 22.85474395751953, 21.360870361328125, 43.22796630859375, -16.50714111328125, -6.440240859985352, 38.64543151855469, 26.29369354248047, -1.1346092224121094, 61.988250732421875, 61.811004638671875, 18.110427856445312, 39.2869873046875, 10.068199157714844, 33.79133605957031, 1.539022445678711, 23.74390411376953, 103.48207092285156, 40.4505615234375, 77.9210205078125, 31.33167266845703, 3.6156959533691406, 54.40039825439453, 33.98289489746094, 20.100448608398438, 13.566535949707031, 22.903213500976562, 26.55170440673828, 27.837230682373047, 53.71117401123047, 28.04815101623535, 55.3485107421875, 16.774154663085938, 14.01629638671875, 24.31735610961914, 21.94406509399414, 3.9826583862304688, 66.42994689941406, 29.243919372558594, 43.73240661621094, 10.7154541015625, 82.90982055664062, -1.4394683837890625, 33.37043762207031], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000357.npy"} +{"epoch": 0.5242290748898678, "step": 358, "batch_size": 64, "mean": 29.16506004333496, "std": 27.73453140258789, "min": -16.76531982421875, "p10": 3.1565925598144533, "median": 21.16253089904785, "p90": 64.41500778198244, "max": 136.02865600585938, "pos_frac": 0.953125, "sample": [14.812507629394531, 85.35151672363281, 11.074615478515625, 65.65730285644531, 21.74135971069336, 21.35173797607422, 8.148521423339844, 38.023529052734375, 20.384109497070312, 22.172815322875977, 17.60138702392578, 20.145782470703125, 10.00482177734375, 4.1477508544921875, 2.8076820373535156, 60.80914306640625, 4.2230224609375, 9.200180053710938, 41.6064453125, 8.31976318359375, -8.065786361694336, 3.84271240234375, 0.4708404541015625, 15.457813262939453, 20.973323822021484, -16.76531982421875, 46.843780517578125, -1.468414306640625, 67.20028686523438, 80.7544937133789, 34.16926574707031, 44.20089340209961, 44.25614929199219, 3.0763168334960938, 52.397125244140625, 52.69476318359375, 35.211090087890625, 23.194286346435547, 1.1639575958251953, 30.00677490234375, 47.422271728515625, 70.16838073730469, 5.169471740722656, 51.75061798095703, 41.08746337890625, 3.343902587890625, 48.495391845703125, 56.677703857421875, 8.190399169921875, 12.976612091064453, 32.405364990234375, 13.063896179199219, 7.491668701171875, 7.963884353637695, 89.27313232421875, 136.02865600585938, 7.518011093139648, 61.516319274902344, 29.960830688476562, 16.351409912109375, 53.16677474975586, 9.911582946777344, 35.16569519042969, 4.266008377075195], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000358.npy"} +{"epoch": 0.5256975036710719, "step": 359, "batch_size": 64, "mean": 32.468040466308594, "std": 28.898197174072266, "min": -21.433868408203125, "p10": 0.9169551849365258, "median": 26.37386703491211, "p90": 78.59049682617187, "max": 111.612548828125, "pos_frac": 0.890625, "sample": [-19.326324462890625, 12.871345520019531, 13.216659545898438, 79.98713684082031, -21.433868408203125, 111.612548828125, 4.894989013671875, 78.54132080078125, 25.919349670410156, -11.837993621826172, 24.821014404296875, 6.140296936035156, 30.84234619140625, 58.53763198852539, 12.270811080932617, 25.498382568359375, 35.894813537597656, 10.980804443359375, 26.828384399414062, 52.83196258544922, 38.136749267578125, 23.238033294677734, 82.4369888305664, 28.82408332824707, 3.231830596923828, 28.64767074584961, 18.3051815032959, 31.778846740722656, 31.05462646484375, 14.34930419921875, 63.38201904296875, 51.36953353881836, 107.64772033691406, 25.789962768554688, 22.416763305664062, 41.449440002441406, 33.80097198486328, 47.426361083984375, -2.7488555908203125, 83.63115692138672, 10.873144149780273, 25.85223388671875, 56.38136291503906, 55.43571472167969, 34.97761535644531, 16.96878433227539, 47.82695770263672, -1.3889083862304688, 23.43970489501953, 92.896240234375, -0.07513427734375, -9.336288452148438, 36.98243713378906, 9.900390625, 16.222667694091797, 58.46873474121094, 33.207618713378906, 23.881778717041016, 78.611572265625, 51.963462829589844, 12.293289184570312, 49.858665466308594, 8.130407333374023, 11.322380065917969], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000359.npy"} +{"epoch": 0.527165932452276, "step": 360, "batch_size": 64, "mean": 29.71305274963379, "std": 23.531862258911133, "min": -20.753082275390625, "p10": 5.0232990264892585, "median": 24.8002872467041, "p90": 56.39331817626954, "max": 95.16033935546875, "pos_frac": 0.953125, "sample": [4.830169677734375, 39.234130859375, 25.188697814941406, 18.530426025390625, -9.540491104125977, 62.05389404296875, 16.99237823486328, 46.46466827392578, 40.59130096435547, 29.753982543945312, 1.647735595703125, 39.891937255859375, 18.495399475097656, 26.574886322021484, 45.515174865722656, 14.568929672241211, 9.367162704467773, -6.643028259277344, 42.760894775390625, 16.564727783203125, 72.41969299316406, 31.292993545532227, 17.698640823364258, 42.013916015625, 21.576068878173828, 51.290435791015625, 45.89900207519531, 24.45166778564453, 36.686710357666016, 80.81108856201172, 95.16033935546875, 52.29573059082031, 9.943016052246094, -20.753082275390625, 11.405920028686523, 26.729812622070312, 25.148906707763672, 14.950410842895508, 52.60090637207031, 13.976722717285156, 7.913171768188477, 89.54353332519531, 9.193748474121094, 24.227794647216797, 34.73469543457031, 5.5550384521484375, 5.473934173583984, 22.04400634765625, 56.93402862548828, 24.18017578125, 0.5147857666015625, 50.169891357421875, 0.21121788024902344, 75.91110229492188, 55.13166046142578, 18.279369354248047, 48.03662872314453, 24.27051544189453, 17.23244285583496, 34.626678466796875, 10.730255126953125, 26.0339412689209, 19.13518524169922, 53.109718322753906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000360.npy"} +{"epoch": 0.5286343612334802, "step": 361, "batch_size": 64, "mean": 28.376157760620117, "std": 25.25600814819336, "min": -18.446258544921875, "p10": 2.174681282043457, "median": 24.963726043701172, "p90": 61.204568481445314, "max": 94.2508773803711, "pos_frac": 0.90625, "sample": [11.15452766418457, 29.362449645996094, 28.358524322509766, 15.974365234375, 9.557243347167969, 23.282732009887695, 34.104331970214844, 30.857091903686523, 93.75801086425781, 27.726318359375, 17.956954956054688, 30.676116943359375, -18.446258544921875, 53.425811767578125, 9.578659057617188, -12.962629318237305, 27.40666389465332, 61.02973937988281, 2.764493942260742, 13.401762008666992, 19.13379669189453, -4.452606201171875, 36.936729431152344, 14.83523178100586, 48.3985595703125, 71.08941650390625, 25.217864990234375, 56.446685791015625, 8.565643310546875, 23.462257385253906, 5.917449951171875, 4.615705490112305, 12.746635437011719, 2.8797378540039062, 72.15229034423828, 50.42926025390625, 6.145210266113281, 61.27949523925781, 94.2508773803711, 18.503448486328125, 49.926849365234375, 56.96149826049805, 79.34115600585938, 42.62201690673828, 6.783668518066406, 39.95410919189453, 42.99768829345703, 36.19049072265625, 25.103057861328125, 2.1152801513671875, 18.88853645324707, 15.60177993774414, 50.01384735107422, 2.313283920288086, 15.475631713867188, 33.12094497680664, 43.0653076171875, -2.725055694580078, 34.2972412109375, 24.82439422607422, 10.275054931640625, -7.484138488769531, -0.2952880859375, 79.18621063232422], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000361.npy"} +{"epoch": 0.5301027900146843, "step": 362, "batch_size": 64, "mean": 33.91130828857422, "std": 27.831872940063477, "min": -17.039688110351562, "p10": 1.8575315475463896, "median": 30.803786277770996, "p90": 72.03871688842774, "max": 103.3932876586914, "pos_frac": 0.90625, "sample": [19.105396270751953, 72.78730010986328, 64.51485443115234, 66.95848846435547, 0.65191650390625, 4.791233062744141, 102.69601440429688, 46.62689971923828, 17.358226776123047, 44.681121826171875, 43.65838623046875, 13.703939437866211, 20.78374481201172, 52.38160705566406, 45.48615264892578, 45.243011474609375, 10.102838516235352, -1.8797931671142578, 44.77399444580078, 33.81256103515625, 18.14476776123047, 15.914276123046875, 13.766220092773438, 8.686492919921875, -17.039688110351562, -0.111785888671875, 28.030868530273438, 62.47285461425781, 4.670633316040039, 103.3932876586914, 77.21343231201172, 35.31709289550781, 7.0967254638671875, 26.68475341796875, 14.935688018798828, 29.961103439331055, 85.93232727050781, -8.765758514404297, 31.646469116210938, 8.25067138671875, 40.66327667236328, 8.59263801574707, 79.35842895507812, 88.92340087890625, 20.725425720214844, 39.706153869628906, 22.090320587158203, 35.9813117980957, 27.770309448242188, 70.29202270507812, 19.835670471191406, 64.808349609375, 32.620567321777344, 35.77143859863281, 17.980018615722656, 63.356224060058594, 54.522796630859375, -12.45074462890625, 46.78718566894531, 21.596038818359375, 55.652183532714844, 37.19273376464844, -2.5680465698242188, 6.6778564453125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000362.npy"} +{"epoch": 0.5315712187958884, "step": 363, "batch_size": 64, "mean": 39.09236145019531, "std": 32.99383544921875, "min": -25.24774169921875, "p10": 6.662544250488282, "median": 30.78058433532715, "p90": 82.38440856933593, "max": 136.0556182861328, "pos_frac": 0.921875, "sample": [-7.625511169433594, 29.94881820678711, 86.52278900146484, 32.6065559387207, -9.41348648071289, 21.973846435546875, 52.105079650878906, 71.37445831298828, 11.870651245117188, 16.341699600219727, 37.914249420166016, 6.2176666259765625, 71.49728393554688, 30.88162612915039, 16.400466918945312, 111.15762329101562, -5.452920913696289, 43.93487548828125, 15.737764358520508, 82.45164489746094, 40.42761993408203, 60.20073699951172, 40.577911376953125, 81.19019317626953, 12.295503616333008, 45.78284454345703, 101.24642944335938, 42.120025634765625, 7.700592041015625, 69.1160888671875, 20.405540466308594, 39.95429229736328, 20.99713134765625, 8.41440200805664, 67.6571044921875, 56.11024475097656, 18.647523880004883, 25.822507858276367, 96.21058654785156, 89.8672103881836, 74.95846557617188, 17.47887420654297, 4.187397003173828, 76.75212097167969, -7.918718338012695, 24.189529418945312, 49.435768127441406, -25.24774169921875, 26.747055053710938, 82.01399230957031, 22.185264587402344, 9.54952621459961, 12.449790954589844, 42.47746276855469, 82.22752380371094, 11.644920349121094, 17.50102996826172, 14.034637451171875, 8.104747772216797, 27.661026000976562, 136.0556182861328, 41.394683837890625, 30.679542541503906, 62.15892028808594], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000363.npy"} +{"epoch": 0.5330396475770925, "step": 364, "batch_size": 64, "mean": 28.14519691467285, "std": 26.875614166259766, "min": -16.416404724121094, "p10": 0.5948112487792986, "median": 22.78134536743164, "p90": 64.51367492675782, "max": 109.86663818359375, "pos_frac": 0.890625, "sample": [15.912740707397461, 39.986061096191406, -13.440353393554688, 3.434030532836914, 3.0102157592773438, 51.31538391113281, -9.099533081054688, 31.08734130859375, 51.546356201171875, 100.69587707519531, 9.164020538330078, 68.11975860595703, 54.508209228515625, 22.815383911132812, 2.2671432495117188, 11.638046264648438, 9.812423706054688, 3.6317672729492188, 48.45440673828125, -8.9290771484375, 34.1547966003418, 28.35049057006836, 43.70948028564453, 9.996219635009766, 16.996788024902344, 15.40629768371582, 14.857616424560547, 75.00103759765625, 28.826473236083984, 30.485353469848633, 57.57599639892578, 65.29391479492188, 15.507469177246094, 37.45849609375, 16.403377532958984, 28.334136962890625, 24.62591552734375, 7.727375030517578, 57.92481231689453, -0.1219024658203125, 2.9997940063476562, 95.44557189941406, 22.74730682373047, 29.985563278198242, 18.540786743164062, -16.416404724121094, 41.208465576171875, 25.934066772460938, 46.25812530517578, 8.6070556640625, 17.595531463623047, 44.495147705078125, -5.322004318237305, 23.660728454589844, 109.86663818359375, 12.286453247070312, 21.5384521484375, 66.35963439941406, -0.20252037048339844, 62.693115234375, 13.666557312011719, 21.424072265625, 27.541885375976562, 5.934234619140625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000364.npy"} +{"epoch": 0.5345080763582967, "step": 365, "batch_size": 64, "mean": 27.50690460205078, "std": 26.08824348449707, "min": -5.658348083496094, "p10": 3.131523132324219, "median": 19.568644523620605, "p90": 58.29841003417969, "max": 128.43862915039062, "pos_frac": 0.90625, "sample": [53.93779754638672, -5.658348083496094, 7.688468933105469, 5.789707183837891, 7.729269027709961, 11.025203704833984, 46.33349609375, 3.9732894897460938, 14.518112182617188, 7.822540283203125, 43.38429260253906, 41.79015350341797, -2.915803909301758, 58.812835693359375, 3.7489471435546875, 8.9847412109375, 40.783843994140625, 70.67962646484375, 48.12724304199219, 53.176292419433594, 20.250473022460938, 15.10954475402832, 26.589431762695312, 2.866912841796875, 11.82094955444336, 10.155525207519531, 18.9442081451416, 27.18777084350586, 42.228755950927734, -5.589946746826172, 9.177810668945312, -1.9217338562011719, 24.202720642089844, 20.458637237548828, 30.010177612304688, 53.386192321777344, 10.297832489013672, 20.19308090209961, 5.1278839111328125, 4.37451171875, 21.971694946289062, 9.437606811523438, 71.8671875, 4.656946182250977, 15.061933517456055, 77.63752746582031, 40.92424392700195, 18.072891235351562, 24.442184448242188, 11.511520385742188, 41.56005859375, 56.64197540283203, -1.1872367858886719, 37.597068786621094, 16.73623275756836, 27.712642669677734, 128.43862915039062, 87.78048706054688, 12.957847595214844, -2.4200782775878906, 64.87901306152344, 57.09808349609375, 16.049835205078125, 56.41107940673828], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000365.npy"} +{"epoch": 0.5359765051395007, "step": 366, "batch_size": 64, "mean": 29.27224349975586, "std": 23.412845611572266, "min": -7.074731826782227, "p10": 3.2916528701782237, "median": 23.773966789245605, "p90": 67.7847999572754, "max": 89.33551025390625, "pos_frac": 0.953125, "sample": [14.387046813964844, 23.29311180114746, 49.82225799560547, 12.088119506835938, 17.66717529296875, 6.241230010986328, 9.892265319824219, 17.516460418701172, 25.743484497070312, 89.33551025390625, 13.298627853393555, 5.815338134765625, 66.32880401611328, 69.87577819824219, 37.616275787353516, 26.30011749267578, 20.481414794921875, 0.7433605194091797, -7.074731826782227, 9.769214630126953, 43.19289779663086, 15.43975830078125, 73.14590454101562, -3.6774463653564453, 68.40879821777344, 74.3461685180664, 42.37000274658203, 16.313278198242188, 41.846771240234375, 24.25482177734375, 20.484464645385742, 2.83807373046875, 5.516761779785156, 15.576057434082031, 45.2884521484375, 1.9948959350585938, 79.76673889160156, 47.007789611816406, 26.43572998046875, 20.292150497436523, 32.02366638183594, 35.00257110595703, 13.124090194702148, 0.6578655242919922, 26.386150360107422, 11.051706314086914, 60.986602783203125, -5.968929290771484, 37.994659423828125, 71.72183227539062, 60.21693420410156, 22.51660919189453, 46.38819885253906, 15.808242797851562, 59.18450927734375, 32.438568115234375, 4.350004196166992, 39.747100830078125, 6.729213714599609, 15.558197021484375, 30.222536087036133, 29.529451370239258, 43.890289306640625, 13.880531311035156], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000366.npy"} +{"epoch": 0.5374449339207048, "step": 367, "batch_size": 64, "mean": 25.49578094482422, "std": 22.78716278076172, "min": -16.36359405517578, "p10": 1.581372070312502, "median": 21.96495819091797, "p90": 53.31651763916016, "max": 100.38272094726562, "pos_frac": 0.90625, "sample": [3.554859161376953, 9.706878662109375, 49.538902282714844, 7.756618499755859, 98.96357727050781, 20.218149185180664, 18.220657348632812, 36.6668815612793, 58.14189147949219, 4.6348724365234375, 25.79376220703125, -0.5747013092041016, 6.7929229736328125, 33.714447021484375, 31.281742095947266, 51.043739318847656, 19.711441040039062, 8.3956298828125, 39.60304260253906, 0.7355918884277344, 28.413070678710938, 51.85112762451172, 19.460166931152344, 14.496452331542969, 17.652923583984375, 36.22486877441406, 54.37632751464844, 17.973268508911133, 50.02832794189453, 7.148406982421875, 45.879600524902344, 21.42230224609375, 24.876541137695312, 27.127952575683594, 29.681991577148438, 18.14281463623047, 46.75511932373047, 28.476343154907227, 61.14869689941406, 16.684528350830078, 23.46587562561035, 6.340545654296875, -9.556236267089844, 16.161235809326172, 20.82232666015625, -6.778339385986328, -13.861785888671875, 34.000274658203125, 53.944541931152344, 22.507614135742188, 13.839996337890625, -13.723419189453125, 35.64936828613281, 18.920440673828125, 26.42751693725586, 36.23145294189453, 11.363361358642578, 5.373504638671875, 56.01271057128906, 26.691253662109375, 8.078872680664062, -16.36359405517578, 100.38272094726562, 34.07807159423828], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000367.npy"} +{"epoch": 0.5389133627019089, "step": 368, "batch_size": 64, "mean": 30.921649932861328, "std": 24.50312042236328, "min": -7.295450210571289, "p10": 5.002363395690918, "median": 28.262661933898926, "p90": 59.38735656738282, "max": 111.9737548828125, "pos_frac": 0.9375, "sample": [30.041566848754883, -5.949703216552734, 32.02101516723633, 18.378494262695312, 39.430450439453125, 47.80039978027344, 7.849964141845703, -4.523529052734375, 60.02703094482422, 19.936126708984375, 40.369449615478516, 35.22428894042969, 66.21086120605469, 6.2208251953125, 31.407825469970703, 29.254825592041016, 28.019508361816406, 24.279327392578125, 6.697532653808594, 28.481002807617188, 57.89478302001953, 42.19860076904297, 26.007293701171875, 36.8953857421875, 81.37664794921875, 6.940704345703125, 16.81249237060547, -1.6924495697021484, 8.310380935668945, -7.295450210571289, 51.43663024902344, 2.824554443359375, 30.786827087402344, 15.267852783203125, 1.5644989013671875, 19.566078186035156, 42.324920654296875, 28.044321060180664, 22.979251861572266, 18.70384979248047, 12.143255233764648, 82.40228271484375, 26.04241180419922, 48.16851806640625, 33.80989074707031, 42.90283966064453, 111.56769561767578, 16.953643798828125, 4.948246002197266, 10.975494384765625, 31.85192108154297, 64.1006851196289, 34.07316970825195, 40.68836212158203, 55.62331771850586, 5.128637313842773, 25.357276916503906, 111.9737548828125, 46.138084411621094, 41.79011154174805, 25.29570770263672, 20.799591064453125, 30.54568862915039, 13.58050537109375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000368.npy"} +{"epoch": 0.540381791483113, "step": 369, "batch_size": 64, "mean": 25.208810806274414, "std": 22.390411376953125, "min": -21.951133728027344, "p10": -1.8395601272582998, "median": 23.230974197387695, "p90": 55.85286407470704, "max": 81.6043701171875, "pos_frac": 0.859375, "sample": [2.5762939453125, 8.389242172241211, 81.6043701171875, 10.847883224487305, -6.916481018066406, 9.523843765258789, 10.363521575927734, 51.64479446411133, 43.43490982055664, 68.991943359375, 40.601871490478516, 4.1589508056640625, 8.6617431640625, 11.801506042480469, 28.392303466796875, 56.69672393798828, 25.354019165039062, 48.960147857666016, -0.3505744934082031, 23.723587036132812, 37.44780731201172, 22.991470336914062, 20.232025146484375, 13.873046875, 42.456085205078125, -6.72467041015625, 2.0375595092773438, 12.297264099121094, 14.679039001464844, 26.275367736816406, 45.99998474121094, 51.90290069580078, 53.88385772705078, -21.951133728027344, 12.896453857421875, 35.27366256713867, 47.98780822753906, 35.246925354003906, 42.028114318847656, 66.56719970703125, 58.38742446899414, -5.5197601318359375, 72.32223510742188, 16.83415985107422, 60.91740417480469, -6.520355224609375, 28.391510009765625, -5.489295959472656, -2.238534927368164, 38.99806213378906, 23.470478057861328, 22.75060272216797, 34.01702880859375, 23.501686096191406, 18.41126251220703, 12.991092681884766, 26.633316040039062, 11.710624694824219, 34.88360595703125, 17.403518676757812, 1.3875885009765625, -0.9086189270019531, 33.896602630615234, 13.270896911621094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000369.npy"} +{"epoch": 0.5418502202643172, "step": 370, "batch_size": 64, "mean": 24.929824829101562, "std": 22.07652473449707, "min": -8.30179214477539, "p10": -1.249482154846191, "median": 24.80476188659668, "p90": 56.0314811706543, "max": 88.1761245727539, "pos_frac": 0.84375, "sample": [24.463077545166016, 31.512672424316406, -0.7913055419921875, 6.6090240478515625, 88.1761245727539, 6.6258544921875, 27.172164916992188, 14.891578674316406, 1.2645626068115234, 20.239822387695312, 59.61151123046875, 19.41473388671875, 5.7885589599609375, 25.848495483398438, 15.803237915039062, 32.13691711425781, 43.532073974609375, 7.424686431884766, 27.35149383544922, 57.56254577636719, 49.655372619628906, -0.820159912109375, 60.42750930786133, 15.587699890136719, 25.146446228027344, 29.33121109008789, 24.358718872070312, -1.9532032012939453, 4.351402282714844, 30.20520782470703, 26.863624572753906, 7.086944580078125, 10.04330062866211, 48.702369689941406, -8.30179214477539, 29.775592803955078, 25.79108428955078, 39.50360107421875, 0.959747314453125, -1.4334774017333984, 42.855926513671875, 41.21293640136719, -4.111759185791016, 27.522747039794922, 55.06756591796875, 23.076732635498047, 16.180801391601562, 42.34852600097656, 41.3656120300293, -1.892059326171875, 1.7277679443359375, -5.2489471435546875, 35.8055419921875, 65.14761352539062, -0.0966339111328125, 10.581398010253906, 14.374137878417969, -3.6420059204101562, 1.2325172424316406, 79.72041320800781, 40.91602325439453, 47.075340270996094, 56.44458770751953, 37.924957275390625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000370.npy"} +{"epoch": 0.5433186490455213, "step": 371, "batch_size": 64, "mean": 27.12832260131836, "std": 28.606050491333008, "min": -31.03631591796875, "p10": -0.13489761352538931, "median": 20.910491943359375, "p90": 57.38357696533203, "max": 122.3263931274414, "pos_frac": 0.890625, "sample": [57.01066589355469, 52.970970153808594, 17.079803466796875, 7.873357772827148, -31.03631591796875, 1.2091064453125, 122.3263931274414, 19.614110946655273, 56.56268310546875, 47.46638488769531, 6.6015777587890625, 7.119634628295898, 31.474319458007812, 35.06859588623047, -8.13690185546875, 15.881515502929688, 29.016944885253906, 12.28373908996582, 73.44371032714844, 31.80748748779297, 5.346590042114258, 16.263587951660156, 25.898239135742188, -16.795242309570312, -5.940399169921875, 49.81414031982422, 86.36447143554688, 39.192169189453125, 30.00774383544922, 19.317758560180664, 119.3509750366211, 10.586318969726562, 21.609832763671875, 35.99171447753906, -0.7108993530273438, 11.631195068359375, 30.992828369140625, 14.336158752441406, 9.765689849853516, 58.87687683105469, -16.180448532104492, 22.981300354003906, 56.479339599609375, 20.09495735168457, 5.943946838378906, 53.287086486816406, 25.88146209716797, 2.639923095703125, 55.85234069824219, 17.363372802734375, 26.846649169921875, 3.6758041381835938, 36.48362350463867, 20.211151123046875, 37.193363189697266, 4.428619384765625, 29.746440887451172, 4.241209030151367, -3.154815673828125, 59.722198486328125, 57.54339599609375, 56.209686279296875, 3.9907684326171875, 7.193675994873047], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000371.npy"} +{"epoch": 0.5447870778267254, "step": 372, "batch_size": 64, "mean": 31.275503158569336, "std": 29.992393493652344, "min": -19.61409568786621, "p10": 4.132645034790039, "median": 27.652746200561523, "p90": 69.65729370117188, "max": 135.39608764648438, "pos_frac": 0.90625, "sample": [29.2171630859375, 49.70752716064453, 96.47564697265625, 25.412734985351562, 4.5763092041015625, 38.276214599609375, 62.112361907958984, 39.3125, 13.541885375976562, 9.277572631835938, 69.19659423828125, 28.986572265625, 9.44537353515625, 57.22441101074219, 5.6750030517578125, 17.01398468017578, 4.056859970092773, 26.318920135498047, 51.591949462890625, -19.61409568786621, -8.25579833984375, 38.416378021240234, 76.60188293457031, 16.54473114013672, 98.43962860107422, 34.143882751464844, 17.880035400390625, 24.880577087402344, 6.8389739990234375, 30.207889556884766, 8.3909912109375, 17.182327270507812, 10.424461364746094, 14.168731689453125, 43.51213073730469, -0.16347312927246094, 70.57707977294922, 43.42436981201172, 10.984670639038086, 29.484840393066406, 19.848724365234375, 43.831947326660156, 31.74831771850586, -18.470428466796875, 51.76011276245117, 69.854736328125, 46.61468505859375, 42.15416717529297, -5.2023773193359375, 30.385826110839844, 8.13385009765625, 57.039947509765625, 42.24055480957031, 11.678060531616211, 101.29817199707031, 4.400720596313477, 19.95387077331543, 29.338043212890625, 4.309476852416992, 135.39608764648438, -13.437154769897461, 11.089118957519531, 63.99913024902344, 12.176856994628906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000372.npy"} +{"epoch": 0.5462555066079295, "step": 373, "batch_size": 64, "mean": 32.979225158691406, "std": 26.324975967407227, "min": -15.974594116210938, "p10": 5.735283946990967, "median": 33.9997673034668, "p90": 68.68919525146485, "max": 119.2515869140625, "pos_frac": 0.953125, "sample": [1.4300537109375, 7.053707122802734, 6.483757019042969, 20.68431854248047, 17.25653648376465, 77.83971405029297, 45.43951416015625, 29.25177001953125, 25.178192138671875, 24.487518310546875, 50.22695541381836, 34.428382873535156, 12.08270263671875, 6.286460876464844, 33.08445739746094, 41.46868133544922, 5.563900947570801, 73.68760681152344, 41.154335021972656, 0.071685791015625, 2.9240798950195312, 43.888214111328125, 43.90089416503906, -14.959503173828125, 19.691810607910156, 7.836328506469727, 30.249027252197266, 41.01576232910156, 14.04473876953125, 12.272258758544922, 8.656867980957031, 61.109893798828125, 34.53313446044922, 8.617584228515625, 76.3611068725586, 6.1351776123046875, 41.07006072998047, 76.60484313964844, 11.82600212097168, 10.083709716796875, 53.48765563964844, 39.69426727294922, 51.28845977783203, 80.08770751953125, 69.53501892089844, 41.46836853027344, 41.53142547607422, 119.2515869140625, 63.385459899902344, 40.953826904296875, -11.649566650390625, 66.71560668945312, 14.474090576171875, 36.662071228027344, 8.232536315917969, 52.598167419433594, 54.54359436035156, 12.813074111938477, 44.98627471923828, 33.57115173339844, 54.998565673828125, 45.51830291748047, -15.974594116210938, 23.475013732910156], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000373.npy"} +{"epoch": 0.5477239353891337, "step": 374, "batch_size": 64, "mean": 29.80048942565918, "std": 25.573938369750977, "min": -10.293472290039062, "p10": 2.551044845581056, "median": 25.200302124023438, "p90": 68.3887809753418, "max": 98.57064056396484, "pos_frac": 0.9375, "sample": [14.249130249023438, 25.256492614746094, 8.222152709960938, 80.37310791015625, 27.479324340820312, 50.09088134765625, 8.918075561523438, 36.79203796386719, 18.583465576171875, 2.053607940673828, 11.327346801757812, 1.4392318725585938, 22.688491821289062, 1.2692070007324219, 29.412872314453125, 8.132926940917969, 45.95409393310547, 28.9466552734375, 65.75935363769531, 10.767353057861328, 34.481597900390625, 18.22634506225586, 84.5960922241211, 21.93902587890625, -0.5776939392089844, 86.07693481445312, 21.475099563598633, 28.925411224365234, -6.31005859375, 26.16046905517578, 56.438232421875, 44.25038146972656, 7.1015625, 71.67393493652344, -9.534576416015625, 60.68998718261719, 4.24755859375, 56.488250732421875, 9.864889144897461, 5.43878173828125, 14.245121002197266, 33.498085021972656, 15.643535614013672, 14.492563247680664, 7.322187423706055, 25.14411163330078, 33.61668395996094, 98.57064056396484, 35.62657165527344, 31.855239868164062, 4.7896270751953125, 77.10850524902344, 15.142532348632812, 60.60385513305664, 35.56095886230469, 20.08953857421875, -10.293472290039062, 55.339210510253906, 41.470603942871094, 17.495840072631836, 55.74018859863281, 3.71173095703125, 31.57379913330078, 69.51567840576172], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000374.npy"} +{"epoch": 0.5491923641703378, "step": 375, "batch_size": 64, "mean": 29.460506439208984, "std": 25.48676109313965, "min": -15.105438232421875, "p10": -1.250460052490234, "median": 26.082965850830078, "p90": 63.993035888671876, "max": 85.49844360351562, "pos_frac": 0.859375, "sample": [68.7127914428711, 45.65232467651367, 3.34381103515625, 24.29412841796875, 27.196617126464844, 2.5595130920410156, 64.01866149902344, 17.967330932617188, -1.3378105163574219, 9.834980010986328, 7.662618637084961, 41.35746765136719, 45.83842468261719, 1.6017532348632812, -4.210411071777344, 49.3707275390625, 56.394142150878906, 15.57371711730957, 53.95439910888672, 24.969314575195312, 13.921844482421875, 47.04180908203125, -3.907867431640625, 30.94115447998047, 10.274112701416016, 11.437118530273438, 77.04570007324219, -6.263500213623047, 3.972320556640625, 44.66160583496094, 28.608245849609375, 15.98626708984375, 60.80168151855469, 56.916229248046875, -1.0466423034667969, 39.12333679199219, 63.93324279785156, 35.52314758300781, 17.960403442382812, 72.82694244384766, 17.732582092285156, 16.090051651000977, 15.677452087402344, -15.105438232421875, 62.26679992675781, 56.09258270263672, 57.310340881347656, -6.723670959472656, 85.49844360351562, 64.96241760253906, 41.083099365234375, -9.657154083251953, -0.483306884765625, 65.04054260253906, 31.827346801757812, 60.77239227294922, 3.279052734375, 45.34300994873047, 20.347251892089844, 4.628589630126953, 19.40142822265625, 32.802635192871094, 37.57896041870117, 5.195331573486328], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000375.npy"} +{"epoch": 0.5506607929515418, "step": 376, "batch_size": 64, "mean": 32.31665802001953, "std": 27.68313217163086, "min": -17.767858505249023, "p10": 4.782272529602051, "median": 25.43633270263672, "p90": 78.7999481201172, "max": 108.45077514648438, "pos_frac": 0.953125, "sample": [30.78870391845703, 91.017333984375, 15.084091186523438, 18.57904052734375, 94.41456604003906, 31.848915100097656, 76.53936767578125, -5.45707893371582, 14.278335571289062, 7.4388580322265625, 34.202667236328125, 3.3204479217529297, 57.79142761230469, 8.06658935546875, 2.8897628784179688, 42.174560546875, 34.785911560058594, 4.9830780029296875, 26.278663635253906, 19.183774948120117, 15.234130859375, 4.979375839233398, 12.56024169921875, 79.76876831054688, 64.04920959472656, 18.013656616210938, 38.20903015136719, 41.26396179199219, 64.29618072509766, 10.331554412841797, 57.73289489746094, 98.76008605957031, 4.6977996826171875, 62.70771789550781, 48.31135559082031, 10.810050964355469, 42.14311981201172, 49.56492614746094, 27.318405151367188, 32.091007232666016, 24.514556884765625, 31.087421417236328, 18.893692016601562, 23.870758056640625, 29.15838623046875, 16.341785430908203, 108.45077514648438, 25.007614135742188, -4.581203460693359, 1.6901626586914062, 23.30120849609375, -17.767858505249023, 80.04469299316406, 25.86505126953125, 16.463485717773438, 80.43817138671875, 21.932594299316406, 40.860191345214844, 14.012739181518555, 5.353424072265625, 27.792692184448242, 22.892772674560547, 52.74588394165039, 8.844505310058594], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000376.npy"} +{"epoch": 0.5521292217327459, "step": 377, "batch_size": 64, "mean": 29.981285095214844, "std": 27.36408042907715, "min": -19.292030334472656, "p10": -0.8770988464355468, "median": 27.909320831298828, "p90": 68.54915161132813, "max": 102.1872787475586, "pos_frac": 0.859375, "sample": [35.38666534423828, 64.89399719238281, 32.54766845703125, 14.701370239257812, 15.66533088684082, 7.442819595336914, 69.85061645507812, 19.727630615234375, 69.82215881347656, 3.5503158569335938, 70.29812622070312, 61.113677978515625, 19.569137573242188, 91.8385009765625, 11.620828628540039, 49.186920166015625, 29.970108032226562, 62.345977783203125, 29.78289031982422, 2.568521499633789, 42.22199249267578, 60.34675598144531, 8.969879150390625, -0.3541679382324219, 69.43756103515625, -0.79083251953125, 53.01043701171875, -3.7694854736328125, 66.4761962890625, 7.680671691894531, 29.90346908569336, 22.127870559692383, 10.387931823730469, 20.67388916015625, 4.300563812255859, 44.058998107910156, 51.35148620605469, 18.802207946777344, 35.869720458984375, 51.80436706542969, 32.89330291748047, -0.9250564575195312, -19.292030334472656, 102.1872787475586, 72.6817398071289, 15.734285354614258, 32.842437744140625, -12.344955444335938, 15.22348403930664, 65.62519836425781, 26.035751342773438, 62.50498962402344, 0.6925582885742188, 0.65338134765625, -0.9140701293945312, 31.34168243408203, 12.946357727050781, 12.664794921875, 36.67498779296875, -6.523859024047852, 41.26835632324219, 17.58979034423828, 37.485687255859375, -12.636665344238281], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000377.npy"} +{"epoch": 0.55359765051395, "step": 378, "batch_size": 64, "mean": 31.929780960083008, "std": 25.704288482666016, "min": -18.05096435546875, "p10": 4.556723213195802, "median": 30.30886459350586, "p90": 66.19393386840821, "max": 106.23587036132812, "pos_frac": 0.921875, "sample": [36.090904235839844, 19.491060256958008, 94.9383544921875, 87.42268371582031, 52.65171813964844, 17.45719337463379, 18.324764251708984, 26.01441192626953, 19.277450561523438, 19.96255874633789, 36.29659652709961, 30.33477783203125, 9.799728393554688, 30.94500732421875, 65.3623046875, 3.9567718505859375, 6.028778076171875, 37.54441833496094, 20.425003051757812, 42.355018615722656, 18.577470779418945, -1.8303947448730469, 14.957115173339844, 34.417724609375, 14.597251892089844, 16.995012283325195, 15.489023208618164, 56.974205017089844, 26.853242874145508, 36.395263671875, 47.20026397705078, 41.17205810546875, 44.34119415283203, -16.407926559448242, -8.339813232421875, 66.55034637451172, 30.28295135498047, 91.8892822265625, 0.3084545135498047, 35.387176513671875, 32.308250427246094, 37.047115325927734, 24.327674865722656, 77.02899169921875, 8.533241271972656, -8.765096664428711, 62.78117370605469, 32.3831787109375, 106.23587036132812, 28.539640426635742, 26.861106872558594, 50.804908752441406, 31.084609985351562, 35.016937255859375, 36.95209503173828, 35.234519958496094, 27.419891357421875, 9.271064758300781, 15.372398376464844, 24.01367950439453, 50.82269287109375, -18.05096435546875, 5.956609725952148, 71.83695983886719], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000378.npy"} +{"epoch": 0.5550660792951542, "step": 379, "batch_size": 64, "mean": 33.164710998535156, "std": 28.849937438964844, "min": -19.288570404052734, "p10": 1.7665946960449235, "median": 26.000293731689453, "p90": 76.71302490234378, "max": 115.73968505859375, "pos_frac": 0.921875, "sample": [63.13976287841797, 40.759185791015625, 62.616981506347656, 39.84925842285156, 12.284912109375, 8.578609466552734, 17.686325073242188, 69.91087341308594, 31.78515625, 24.575767517089844, 52.85936737060547, 49.386566162109375, 4.402587890625, 46.82159423828125, 64.21173095703125, 39.20353698730469, 17.456558227539062, 79.02536010742188, 13.342605590820312, 37.55963897705078, 46.09337615966797, 42.672420501708984, 18.56385040283203, 19.50729751586914, 30.423095703125, 17.499052047729492, 31.93499755859375, 10.788349151611328, 6.835849761962891, 0.64935302734375, 71.92301940917969, 32.184303283691406, 1.0992050170898438, 39.106475830078125, 56.549072265625, 78.76588439941406, 17.504440307617188, 64.04263305664062, 102.9554443359375, 11.732852935791016, 15.868997573852539, 85.20124816894531, 21.54422378540039, -2.892721176147461, 52.44715118408203, -4.826530456542969, 9.361785888671875, 27.34355926513672, 85.07217407226562, 5.734722137451172, -19.288570404052734, 19.604217529296875, 46.6270751953125, 24.657028198242188, 115.73968505859375, 3.3238372802734375, -6.63385009765625, 22.576248168945312, 15.39478874206543, 12.472591400146484, 36.775665283203125, 81.39291381835938, -8.295490264892578, 7.053428649902344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000379.npy"} +{"epoch": 0.5565345080763583, "step": 380, "batch_size": 64, "mean": 34.69824981689453, "std": 30.75237464904785, "min": -20.6903076171875, "p10": -6.3120891571044915, "median": 33.50398635864258, "p90": 84.15966720581058, "max": 96.36842346191406, "pos_frac": 0.828125, "sample": [36.77935028076172, 30.127601623535156, -2.66107177734375, 54.77996826171875, 37.14306640625, 8.647525787353516, 15.576148986816406, 89.61991882324219, 32.42502212524414, 15.853523254394531, 13.070671081542969, 94.654052734375, -1.1408157348632812, 96.36842346191406, 90.03887939453125, 5.868480682373047, 15.971382141113281, 48.657012939453125, 61.82440185546875, 15.7677001953125, 59.865657806396484, -14.577285766601562, 11.29275894165039, 34.265525817871094, 22.707841873168945, -7.709461212158203, 61.84614562988281, 12.03424072265625, 61.73707580566406, 86.99866485595703, 39.23832702636719, 49.59512710571289, 45.520042419433594, 32.74244689941406, 77.53533935546875, 49.64788055419922, 37.28960418701172, 30.775894165039062, 50.422725677490234, 30.08545684814453, 89.87490844726562, -6.675655364990234, -12.544595718383789, -3.5836753845214844, -20.6903076171875, 44.080162048339844, 63.840179443359375, 40.304969787597656, 13.610992431640625, -14.939102172851562, 76.28779602050781, 7.322271347045898, 64.89537811279297, 25.188079833984375, 44.405975341796875, -5.463768005371094, 57.98272705078125, 21.801584243774414, 29.4882755279541, -14.9146728515625, 28.190994262695312, 38.94464111328125, 87.27117919921875, 35.32438659667969], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000380.npy"} +{"epoch": 0.5580029368575624, "step": 381, "batch_size": 64, "mean": 34.22349548339844, "std": 30.587743759155273, "min": -29.2945556640625, "p10": 4.905857086181641, "median": 28.489730834960938, "p90": 71.69346618652344, "max": 106.49234008789062, "pos_frac": 0.90625, "sample": [5.485553741455078, 40.08418273925781, 12.517173767089844, 88.63264465332031, 67.167236328125, 56.198089599609375, 70.08819580078125, 13.097366333007812, 26.58045768737793, 64.53768157958984, 56.66193389892578, 31.66552734375, 19.156116485595703, 39.68806457519531, 106.49234008789062, -13.664154052734375, 66.17426300048828, 47.30772399902344, 97.54512023925781, 72.38143920898438, 101.17710876464844, 64.62167358398438, 8.575063705444336, 5.531028747558594, -6.380558013916016, 36.501102447509766, 91.9439697265625, 24.61758804321289, -15.995826721191406, 35.65800476074219, 8.335714340209961, 52.12218475341797, 32.23017120361328, 25.620254516601562, 9.2965087890625, 5.182605743408203, 28.270164489746094, 31.402313232421875, 33.505069732666016, 9.154388427734375, 35.92595672607422, 27.009109497070312, 59.49969482421875, 64.68406677246094, 30.649940490722656, 28.70929718017578, -8.654022216796875, 22.428585052490234, 64.10888671875, 19.650501251220703, 22.736692428588867, 84.89679718017578, 18.06048583984375, 8.855438232421875, 4.787250518798828, -15.617919921875, 24.267105102539062, 60.47907257080078, 56.92195129394531, 11.219526290893555, 25.853214263916016, -29.2945556640625, 7.3427276611328125, 16.618553161621094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000381.npy"} +{"epoch": 0.5594713656387665, "step": 382, "batch_size": 64, "mean": 35.52247619628906, "std": 28.128053665161133, "min": -26.462600708007812, "p10": 2.0245948791503907, "median": 31.464111328125, "p90": 70.64026641845703, "max": 121.1675796508789, "pos_frac": 0.90625, "sample": [121.1675796508789, 41.5135498046875, 21.635019302368164, 24.646453857421875, 15.178705215454102, 83.15675354003906, 1.9417190551757812, 13.667381286621094, 67.16828918457031, 12.573192596435547, 46.59938049316406, 59.36024475097656, 56.225013732910156, 21.66143798828125, 31.4892578125, 44.88352966308594, 50.33984375, -3.3442306518554688, 49.60401153564453, 37.314300537109375, 35.35398864746094, 28.814788818359375, 18.358444213867188, 27.573497772216797, 29.165969848632812, -0.4141998291015625, 31.603029251098633, 27.368804931640625, 15.11041259765625, 12.662918090820312, 72.60133361816406, -3.7425594329833984, -7.725971221923828, 71.0333251953125, 67.15254211425781, -26.462600708007812, 51.63037109375, 41.21087646484375, 8.85174560546875, 72.69330596923828, 9.123283386230469, 31.43896484375, 69.72312927246094, -6.02342414855957, 24.485702514648438, 15.274436950683594, 56.54669189453125, 67.88784790039062, 21.617431640625, 63.82066345214844, 98.2291030883789, 95.4526596069336, 36.48772430419922, 13.271631240844727, 34.766883850097656, 41.43177795410156, 13.573780059814453, 32.07939529418945, 39.57929992675781, 67.76412963867188, 17.3049373626709, 28.58074188232422, 2.2179718017578125, 29.1822509765625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000382.npy"} +{"epoch": 0.5609397944199707, "step": 383, "batch_size": 64, "mean": 32.29367446899414, "std": 25.84360122680664, "min": -10.210609436035156, "p10": 3.504080963134766, "median": 28.656344413757324, "p90": 60.560729598999025, "max": 118.09507751464844, "pos_frac": 0.9375, "sample": [31.258056640625, 12.089302062988281, 35.0657958984375, -7.551307678222656, 36.99517059326172, 9.071897506713867, -10.210609436035156, 95.13917541503906, 49.81453323364258, 4.540531158447266, 59.43473815917969, 20.677379608154297, 6.166004180908203, 15.990211486816406, -0.6606597900390625, 25.726707458496094, 118.09507751464844, 3.4656906127929688, 39.260040283203125, 23.19472885131836, 60.32110595703125, 17.005393981933594, 21.96099853515625, 49.767364501953125, 35.96752166748047, -6.426368713378906, 84.87579345703125, 29.436281204223633, 9.017868041992188, 9.170150756835938, 86.01311492919922, 11.166498184204102, 37.42559814453125, 38.30158996582031, 42.12560272216797, 21.313262939453125, 55.32160186767578, 21.009265899658203, 60.66342544555664, 49.108238220214844, 8.198577880859375, 5.0513458251953125, 27.876407623291016, 43.93852615356445, 53.158958435058594, 26.129150390625, 3.593658447265625, 74.11213684082031, 23.043624877929688, 40.99272918701172, 64.09654235839844, 36.67255401611328, 3.274993896484375, 39.62091064453125, 46.688743591308594, 37.04084014892578, 29.816253662109375, 58.84928894042969, 23.645793914794922, 23.5369873046875, 9.443618774414062, 1.9728240966796875, 25.59356689453125, 59.34028244018555], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000383.npy"} +{"epoch": 0.5624082232011748, "step": 384, "batch_size": 64, "mean": 32.187374114990234, "std": 31.6424560546875, "min": -13.718070983886719, "p10": -0.3848163604736301, "median": 25.140990257263184, "p90": 73.91573715209962, "max": 157.997314453125, "pos_frac": 0.890625, "sample": [18.54424285888672, 26.028648376464844, 44.69647979736328, 3.268360137939453, 10.227455139160156, 11.081146240234375, -13.718070983886719, -6.3627166748046875, 108.43299102783203, 23.595844268798828, 84.11202239990234, 37.15032958984375, 13.96595573425293, 5.48431396484375, 39.01997756958008, 10.315673828125, 19.605148315429688, 10.772771835327148, 58.83030700683594, 25.765869140625, 41.90347671508789, -9.481088638305664, 24.516111373901367, 38.433135986328125, 2.4447898864746094, -8.026580810546875, 6.455619812011719, -3.4098892211914062, -2.651660919189453, 37.040977478027344, 57.007301330566406, 44.271671295166016, 8.063863754272461, 26.924236297607422, 81.5328140258789, 2.2607688903808594, 67.20811462402344, 47.07624053955078, 59.651649475097656, 3.446533203125, 24.05535888671875, 16.318954467773438, 13.790046691894531, 30.583656311035156, 15.8724365234375, 75.46978759765625, 96.46229553222656, 40.20994567871094, 33.844024658203125, 28.15418243408203, 70.28961944580078, 8.748832702636719, 49.542694091796875, 157.997314453125, 49.74736022949219, -1.5186386108398438, 22.966320037841797, 40.582061767578125, 5.452470779418945, 23.592792510986328, 44.4266357421875, 87.12326049804688, 16.067001342773438, 54.72859191894531], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000384.npy"} +{"epoch": 0.5638766519823789, "step": 385, "batch_size": 64, "mean": 28.625473022460938, "std": 22.074491500854492, "min": -14.95257568359375, "p10": 0.14277954101562523, "median": 29.770654678344727, "p90": 56.98138771057129, "max": 76.28366088867188, "pos_frac": 0.921875, "sample": [-1.24151611328125, 13.163009643554688, 1.5127029418945312, 49.446475982666016, 57.29775619506836, 40.78887939453125, 55.44786071777344, 11.94818115234375, 39.93852996826172, 55.56272888183594, 40.608184814453125, -4.261199951171875, -6.8197021484375, 50.5512580871582, 62.42308044433594, 8.198768615722656, 54.001712799072266, 23.151386260986328, 47.11916732788086, 16.58849334716797, 60.47251510620117, 8.56917953491211, -14.95257568359375, 45.45432662963867, 2.5701980590820312, 52.02168273925781, 39.25608444213867, 24.981971740722656, 15.413238525390625, 46.47840881347656, 27.046218872070312, 11.313140869140625, 0.380584716796875, 56.243194580078125, 71.95955657958984, 39.00798416137695, 0.005603790283203125, 8.923957824707031, 57.624168395996094, 41.35539245605469, 2.5296478271484375, 0.040863037109375, 32.949981689453125, 29.127979278564453, 38.3228759765625, 31.209312438964844, 9.894403457641602, 7.5733642578125, 30.413330078125, 7.487297058105469, 30.905441284179688, 37.918731689453125, 22.456703186035156, -0.6337814331054688, 27.30727767944336, 6.367034912109375, 76.28366088867188, 37.402122497558594, 26.040712356567383, 53.181243896484375, 58.38954162597656, 34.775413513183594, 16.29100799560547, 6.2455291748046875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000385.npy"} +{"epoch": 0.5653450807635829, "step": 386, "batch_size": 64, "mean": 34.01884460449219, "std": 24.481178283691406, "min": -21.662757873535156, "p10": 9.051087188720704, "median": 32.14311981201172, "p90": 64.5694076538086, "max": 109.6986083984375, "pos_frac": 0.9375, "sample": [5.101207733154297, 46.56142044067383, 9.244132995605469, 44.93727493286133, 55.8494873046875, 23.40887451171875, 11.332695007324219, 20.35014533996582, 27.734107971191406, 46.780670166015625, 45.525211334228516, 83.87257385253906, -10.942489624023438, 15.894880294799805, 48.09552001953125, 50.9073486328125, 33.094261169433594, 10.700782775878906, 18.046213150024414, -21.662757873535156, 26.467531204223633, 49.58149719238281, 36.24571990966797, 36.01458740234375, 35.68926239013672, 59.26090621948242, 18.705272674560547, 36.85129165649414, 70.87113189697266, 75.65962982177734, 8.968353271484375, 64.840087890625, 58.30778503417969, 24.51671600341797, 27.566818237304688, 68.20719909667969, 109.6986083984375, 31.191978454589844, 50.114471435546875, 13.942087173461914, 36.698394775390625, 47.43935775756836, 20.107955932617188, 34.819698333740234, 22.184295654296875, 12.573844909667969, 11.07025146484375, 56.041236877441406, 24.17987060546875, 56.55279541015625, 9.482528686523438, 8.280158996582031, 12.990985870361328, 27.493648529052734, 20.564958572387695, 36.00090789794922, 63.93782043457031, 42.47550964355469, -4.298698425292969, -8.253448486328125, 73.62967681884766, 28.850881576538086, 63.420387268066406, 13.430427551269531], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000386.npy"} +{"epoch": 0.566813509544787, "step": 387, "batch_size": 64, "mean": 28.748126983642578, "std": 23.2883358001709, "min": -16.17566680908203, "p10": 0.006580543518066681, "median": 25.113113403320312, "p90": 60.574073028564456, "max": 82.43023681640625, "pos_frac": 0.890625, "sample": [26.157997131347656, 31.51342010498047, 30.060222625732422, 11.820655822753906, -6.129703521728516, 71.72737884521484, 25.61431884765625, 0.28357887268066406, 62.94512939453125, 50.81736755371094, -16.17566680908203, 47.159629821777344, 12.459943771362305, 35.73154067993164, 6.6756744384765625, 59.15904998779297, 12.86151123046875, 44.25819396972656, 36.56681823730469, 24.611907958984375, 55.31928253173828, 24.013145446777344, 37.43431854248047, 61.180511474609375, 39.27471160888672, 12.645965576171875, 16.638992309570312, 62.604549407958984, 1.6533889770507812, 23.528484344482422, 8.599540710449219, 66.43089294433594, 82.43023681640625, 16.655704498291016, 75.23129272460938, 10.597671508789062, 42.291175842285156, 22.790081024169922, 13.608451843261719, 39.345523834228516, 23.69780731201172, -0.5467100143432617, 22.864913940429688, 48.92090606689453, 4.4405517578125, 57.398468017578125, -11.768043518066406, -7.0191650390625, 58.49884033203125, -7.701118469238281, 28.47174072265625, 54.153839111328125, 10.803306579589844, 35.52936935424805, -0.11213302612304688, 48.23596954345703, 5.3072509765625, 23.76751708984375, 34.12303161621094, 12.515914916992188, 50.73431396484375, 6.288183212280273, 36.37517547607422, 24.507217407226562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000387.npy"} +{"epoch": 0.5682819383259912, "step": 388, "batch_size": 64, "mean": 35.99916076660156, "std": 31.401548385620117, "min": -12.852188110351562, "p10": 4.186383819580079, "median": 27.861085891723633, "p90": 71.42238769531251, "max": 134.51425170898438, "pos_frac": 0.9375, "sample": [70.57720947265625, 63.785614013671875, 20.70694351196289, 96.7908935546875, 134.51425170898438, 102.41561126708984, 54.51678466796875, -2.3502120971679688, 25.785484313964844, 20.824081420898438, 17.88006591796875, 70.37448120117188, 37.75505065917969, 27.436803817749023, 4.1154327392578125, 15.817440032958984, 26.97265625, 6.097297668457031, 3.8893890380859375, 6.690071105957031, 39.72325134277344, 88.16993713378906, 4.351936340332031, 4.673618316650391, 37.876285552978516, 55.483768463134766, -5.629280090332031, 28.897628784179688, 23.29444122314453, 20.302825927734375, 1.0240097045898438, 24.987152099609375, 30.333847045898438, -10.842605590820312, 28.285367965698242, 24.985855102539062, 26.971786499023438, 18.907936096191406, 104.03892517089844, 7.604253768920898, 22.464385986328125, 8.861377716064453, 37.8189697265625, 32.44715118408203, 21.201736450195312, 52.13664245605469, 62.845924377441406, 48.36084747314453, 6.6973724365234375, 63.80069351196289, 64.94457244873047, 123.14068603515625, -12.852188110351562, 54.773651123046875, 29.800678253173828, 11.103515625, 17.69847869873047, 20.272907257080078, 71.78460693359375, 55.31745910644531, 28.345016479492188, 42.37770080566406, 47.46915817260742, 35.0987548828125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000388.npy"} +{"epoch": 0.5697503671071953, "step": 389, "batch_size": 64, "mean": 34.090003967285156, "std": 26.279804229736328, "min": -12.63510513305664, "p10": 3.3240041732788095, "median": 31.090526580810547, "p90": 65.47119293212891, "max": 134.93673706054688, "pos_frac": 0.90625, "sample": [24.974302291870117, 87.78425598144531, 46.88649368286133, -12.63510513305664, -4.147321701049805, 36.92681884765625, 46.676490783691406, 55.44178009033203, 13.707130432128906, 2.9810791015625, 20.425003051757812, -6.5674591064453125, 14.96286392211914, -3.3453140258789062, 16.57529067993164, 25.589561462402344, 10.847209930419922, 41.549713134765625, 62.94945526123047, 20.990760803222656, 23.173309326171875, 16.862075805664062, 80.10240173339844, 19.749462127685547, 20.70226287841797, 61.85906219482422, 55.9940185546875, 29.889434814453125, 43.33290100097656, 31.908987045288086, 49.400596618652344, 24.03289794921875, 38.383575439453125, 48.49738311767578, 13.959724426269531, 17.105667114257812, 48.97749328613281, 15.728652954101562, 47.39872741699219, 36.299713134765625, 69.70367431640625, 10.029386520385742, 34.22777557373047, -1.9586257934570312, 30.597000122070312, 63.34845733642578, 39.749916076660156, 16.08253288269043, 56.583438873291016, 20.598182678222656, 13.655879974365234, 71.62469482421875, 31.58405303955078, -0.10148239135742188, 44.13066482543945, 47.22727966308594, 65.70928955078125, 64.91563415527344, 69.66224670410156, 13.767498016357422, 12.805267333984375, 4.124162673950195, 42.82514190673828, 134.93673706054688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000389.npy"} +{"epoch": 0.5712187958883994, "step": 390, "batch_size": 64, "mean": 31.478557586669922, "std": 28.455049514770508, "min": -24.08252716064453, "p10": -1.7472772598266597, "median": 28.54885768890381, "p90": 72.7047821044922, "max": 123.35812377929688, "pos_frac": 0.875, "sample": [29.500225067138672, 24.70431137084961, 86.58805084228516, 91.30525207519531, 6.0529022216796875, -1.961782455444336, 27.931961059570312, 23.517257690429688, 27.98322296142578, 38.77033233642578, 79.49359893798828, 27.643386840820312, 43.195526123046875, 48.49767303466797, -1.9820938110351562, 11.047943115234375, 1.3079986572265625, 123.35812377929688, 21.418563842773438, 72.14692687988281, 44.761322021484375, 19.710723876953125, 72.94386291503906, 29.745559692382812, 5.419061660766602, 32.89905548095703, 13.493036270141602, -11.802066802978516, 47.51658248901367, 24.73682403564453, -5.486427307128906, 24.310821533203125, -1.24676513671875, 25.58330535888672, 51.08293151855469, 35.096099853515625, 60.30558776855469, 86.1220932006836, -22.853984832763672, 41.92426300048828, 58.774253845214844, 30.383209228515625, 6.695274353027344, 30.969432830810547, 32.02301025390625, 32.37370300292969, 41.341697692871094, 22.466217041015625, 75.37435150146484, 47.427528381347656, 28.6802978515625, 23.372451782226562, 10.5699462890625, 33.848182678222656, 28.417417526245117, 1.8104877471923828, 13.242446899414062, -9.562566757202148, -24.08252716064453, 66.58345031738281, 7.682304382324219, 10.888004302978516, 46.827354431152344, 43.74053192138672], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000390.npy"} +{"epoch": 0.5726872246696035, "step": 391, "batch_size": 64, "mean": 27.683563232421875, "std": 27.29060935974121, "min": -16.2227783203125, "p10": -0.21825866699218588, "median": 23.451786041259766, "p90": 60.69417800903324, "max": 125.81134033203125, "pos_frac": 0.890625, "sample": [1.4825859069824219, 6.5814056396484375, 21.133079528808594, 12.072113037109375, -5.223175048828125, 27.53266143798828, 14.305440902709961, 32.76696014404297, 29.659564971923828, 43.582275390625, 4.4822540283203125, 51.91064453125, -4.572612762451172, 21.546096801757812, 30.161666870117188, 25.729204177856445, 16.124975204467773, 27.712448120117188, 4.131858825683594, 20.056884765625, 24.93152618408203, 67.89850616455078, 21.9720458984375, 5.701362609863281, -10.89323616027832, 30.42270278930664, 125.81134033203125, 45.647117614746094, 11.847274780273438, 51.321685791015625, 8.647651672363281, 48.580467224121094, 26.238018035888672, 37.29749298095703, -0.880523681640625, 10.287967681884766, 34.698204040527344, 37.28263854980469, 81.8049087524414, 18.693809509277344, -7.0987701416015625, 10.856193542480469, 37.19308853149414, 15.11594009399414, 105.59500122070312, 30.229080200195312, 79.689208984375, 7.51793098449707, -4.0853424072265625, 13.750244140625, 2.226472854614258, 11.093276977539062, -16.2227783203125, 46.92506790161133, 78.40999603271484, 51.30938720703125, 64.45854949951172, 35.56114196777344, 48.73771667480469, 1.3270263671875, 41.71940612792969, 41.783687591552734, 5.4937591552734375, 11.675315856933594], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000391.npy"} +{"epoch": 0.5741556534508077, "step": 392, "batch_size": 64, "mean": 34.45188903808594, "std": 26.532011032104492, "min": -10.12594223022461, "p10": 6.840547180175782, "median": 27.127952575683594, "p90": 66.87245788574221, "max": 130.47052001953125, "pos_frac": 0.953125, "sample": [38.99089813232422, 23.944427490234375, 7.635917663574219, 27.053024291992188, 36.333343505859375, 26.052268981933594, 27.202880859375, 12.561431884765625, 27.03125762939453, 49.79285430908203, 116.33683013916016, 11.268203735351562, 53.07954406738281, 6.578897476196289, 11.155303955078125, 15.655073165893555, 92.36026000976562, 61.644866943359375, 19.66864776611328, 19.815326690673828, 26.898880004882812, 43.65287780761719, 39.868309020996094, 30.465787887573242, 16.43408966064453, 33.44105529785156, 56.81768798828125, 76.43353271484375, 69.11285400390625, 51.80640411376953, 43.05424499511719, 31.39984130859375, 57.708961486816406, 14.484615325927734, 49.427032470703125, 22.515106201171875, 6.1472015380859375, 34.81704330444336, 4.250434875488281, 12.01639175415039, 4.238800048828125, 39.119873046875, 24.624832153320312, 23.07239532470703, 70.17625427246094, 56.28825378417969, 40.78662109375, 70.9393081665039, 45.04979705810547, -3.4470386505126953, 7.45106315612793, 43.07415771484375, -10.12594223022461, 23.029983520507812, 36.409873962402344, 17.08589744567871, 26.937599182128906, 12.037113189697266, 130.47052001953125, -0.2286376953125, 17.547435760498047, 15.168182373046875, 51.74949645996094, 58.55157470703125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000392.npy"} +{"epoch": 0.5756240822320118, "step": 393, "batch_size": 64, "mean": 27.648277282714844, "std": 27.87200164794922, "min": -13.310443878173828, "p10": 0.9448600769042972, "median": 19.51966094970703, "p90": 71.16518859863282, "max": 115.21476745605469, "pos_frac": 0.921875, "sample": [7.259006500244141, 58.984771728515625, 29.179630279541016, 16.692026138305664, 2.0266876220703125, 81.9947738647461, 2.9015541076660156, 75.67585754394531, 37.888946533203125, 65.33085632324219, 24.8182373046875, 32.10108947753906, 10.10993766784668, 71.83173370361328, 6.9073486328125, 12.345787048339844, 96.113525390625, 66.470703125, 28.484649658203125, -0.3929290771484375, 9.898508071899414, 0.04638671875, 5.413732528686523, 16.91130828857422, 12.592288970947266, 40.0961799621582, 1.241363525390625, 34.56043243408203, 25.352935791015625, 12.423126220703125, 31.010669708251953, 35.60798645019531, 45.30206298828125, 85.67706298828125, 9.585289001464844, 3.1200904846191406, 37.890106201171875, 18.04883575439453, 69.60991668701172, 20.184646606445312, -6.9796905517578125, 34.83650207519531, 39.22959899902344, 6.788261413574219, 35.589115142822266, 45.97320556640625, 72.57349395751953, 4.4692840576171875, 18.85467529296875, 0.8177871704101562, 24.787437438964844, 10.899200439453125, 1.3781871795654297, -13.310443878173828, 6.763629913330078, 14.892433166503906, 32.48468017578125, 4.821067810058594, 45.00506591796875, -3.5635108947753906, -12.619932174682617, 40.626007080078125, 115.21476745605469, 8.66189193725586], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000393.npy"} +{"epoch": 0.5770925110132159, "step": 394, "batch_size": 64, "mean": 30.533206939697266, "std": 31.193511962890625, "min": -30.48400115966797, "p10": -3.204917144775388, "median": 20.7296199798584, "p90": 81.40430908203125, "max": 112.64308166503906, "pos_frac": 0.875, "sample": [33.24875259399414, 14.851737976074219, -30.48400115966797, 41.5777473449707, 53.97687530517578, 15.45269775390625, 41.733768463134766, 2.985614776611328, 81.11053466796875, -0.819244384765625, 77.95503234863281, -6.1803436279296875, 1.536529541015625, 20.01214599609375, -5.731040954589844, 23.72101593017578, -5.3698272705078125, 103.06338500976562, 60.391204833984375, 12.059555053710938, 46.61651611328125, 19.634300231933594, 37.597320556640625, 26.273269653320312, 16.21890640258789, 32.332191467285156, 87.494384765625, 12.265945434570312, -9.556251525878906, 84.96044921875, 57.2635498046875, 40.95445251464844, 21.149532318115234, 3.419973373413086, 13.661859512329102, 20.309707641601562, 36.990135192871094, 19.700515747070312, 9.7872314453125, 81.53021240234375, 5.1641387939453125, 38.796875, 10.55862045288086, 38.81085968017578, -16.34601593017578, 17.045166015625, 109.25804901123047, 43.48942565917969, 16.59737777709961, 14.18198013305664, 40.47022247314453, -4.227348327636719, 112.64308166503906, 88.73445129394531, 13.116943359375, 25.786270141601562, 8.118362426757812, 21.511127471923828, 15.484840393066406, 61.49372863769531, 16.46540069580078, 2.004117965698242, 32.848480224609375, 48.422698974609375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000394.npy"} +{"epoch": 0.57856093979442, "step": 395, "batch_size": 64, "mean": 31.938392639160156, "std": 26.75515365600586, "min": -5.456277847290039, "p10": 1.9816175460815435, "median": 26.770893096923828, "p90": 65.15422515869142, "max": 105.67317199707031, "pos_frac": 0.921875, "sample": [5.057159423828125, 32.363094329833984, 2.6153717041015625, 5.234613418579102, 5.254478454589844, 61.56513214111328, 74.54476928710938, 48.989830017089844, -5.456277847290039, 23.863468170166016, 4.7925567626953125, 54.719120025634766, 28.302169799804688, 23.999652862548828, 5.547584533691406, 35.59504318237305, 66.73628234863281, 46.005165100097656, 41.350067138671875, 1.7100086212158203, 51.87117004394531, -1.3290863037109375, 69.97492980957031, 38.83906555175781, 4.672950744628906, 31.825302124023438, 37.21388244628906, 24.715259552001953, 1.071624755859375, 4.096076965332031, 105.67317199707031, 81.48775482177734, 38.44914245605469, 11.080286026000977, 45.89094543457031, 4.803985595703125, 25.23961639404297, 21.959182739257812, 66.19966125488281, 57.04150390625, 62.714874267578125, 46.86894226074219, -0.01117706298828125, 4.9232330322265625, 20.19454002380371, 56.78582000732422, 3.6697845458984375, 94.96143341064453, 20.243745803833008, -5.237037658691406, 10.017864227294922, -1.7574310302734375, 5.717535018920898, 56.24505615234375, 60.38393783569336, 57.699520111083984, 18.79480743408203, 41.039031982421875, 52.64457702636719, 60.47451400756836, 49.42055130004883, 6.786712646484375, 18.931419372558594, 18.97903823852539], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000395.npy"} +{"epoch": 0.580029368575624, "step": 396, "batch_size": 64, "mean": 31.344343185424805, "std": 23.985855102539062, "min": -12.659290313720703, "p10": -0.44924507141113157, "median": 28.423534393310547, "p90": 59.269508361816406, "max": 110.92547607421875, "pos_frac": 0.890625, "sample": [58.007293701171875, 33.36549377441406, 2.8144378662109375, 59.30681610107422, 32.18818664550781, 82.26206970214844, 19.021926879882812, 22.465164184570312, 25.01856231689453, 46.67694091796875, 49.324607849121094, 14.125465393066406, 42.83954620361328, 59.182456970214844, -4.385005950927734, 36.356056213378906, 27.13664436340332, 41.59953308105469, 27.625732421875, 31.10846710205078, 50.555992126464844, 50.342002868652344, 110.92547607421875, 23.425575256347656, 29.029769897460938, 18.36138916015625, 29.656234741210938, 47.97447204589844, 21.762924194335938, 35.604408264160156, -0.9662513732910156, 23.618045806884766, -9.464309692382812, 24.5382080078125, 19.351055145263672, 68.35064697265625, 36.2947998046875, 5.095085144042969, 37.45411682128906, 56.613059997558594, 18.282760620117188, 15.381683349609375, 5.1880340576171875, 27.039920806884766, 17.636123657226562, -4.528621673583984, 59.349395751953125, 46.35551452636719, 12.260225296020508, 40.50410461425781, 82.51593017578125, 28.54167938232422, 26.524261474609375, -11.441106796264648, 48.137939453125, 25.167707443237305, 31.171463012695312, 0.7571029663085938, -12.659290313720703, -5.5209503173828125, 45.23287582397461, 72.57354736328125, 24.699188232421875, 28.305389404296875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000396.npy"} +{"epoch": 0.5814977973568282, "step": 397, "batch_size": 64, "mean": 35.8455810546875, "std": 27.562362670898438, "min": -10.436546325683594, "p10": 3.3675262451171903, "median": 30.555654525756836, "p90": 70.97510986328125, "max": 117.25877380371094, "pos_frac": 0.90625, "sample": [17.10926055908203, 64.15936279296875, 11.571968078613281, 65.3193359375, 28.346580505371094, 25.697860717773438, 8.3441162109375, 27.87256622314453, 72.28572082519531, 20.350265502929688, 53.2572021484375, 23.852188110351562, 29.7000732421875, 13.082969665527344, 15.784698486328125, 33.605194091796875, 12.986686706542969, 22.737838745117188, 47.811126708984375, 21.800399780273438, 31.087078094482422, 87.97526550292969, 41.88496398925781, 45.50466537475586, 70.22428894042969, 6.281654357910156, 64.57192993164062, 67.72454071044922, -5.23480224609375, 80.54630279541016, 68.88040161132812, 15.127079010009766, 7.082099914550781, 83.87628173828125, 54.604862213134766, 24.325828552246094, 23.051219940185547, -6.571754455566406, 34.192832946777344, 47.986595153808594, 9.293441772460938, -5.213306427001953, 17.213359832763672, 2.1186141967773438, 71.29689025878906, 30.02423095703125, -10.436546325683594, 44.66328430175781, 33.167816162109375, 47.10126495361328, -8.799564361572266, 60.970489501953125, 53.60975646972656, 117.25877380371094, 9.941307067871094, 78.5238037109375, 34.99590301513672, -1.9806938171386719, 51.68278884887695, 32.118221282958984, 19.153846740722656, 63.675437927246094, 21.33175277709961, 63.60957336425781], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000397.npy"} +{"epoch": 0.5829662261380323, "step": 398, "batch_size": 64, "mean": 31.041210174560547, "std": 26.031597137451172, "min": -10.649391174316406, "p10": 5.640168571472168, "median": 25.984329223632812, "p90": 58.289406585693364, "max": 129.79881286621094, "pos_frac": 0.9375, "sample": [16.49584197998047, 13.253646850585938, 58.973541259765625, 30.624221801757812, 37.02827072143555, 33.44788360595703, 29.31238555908203, 9.322998046875, 14.471969604492188, 2.8225173950195312, 46.02305603027344, 5.498748779296875, 52.16575622558594, 30.161258697509766, 44.72905731201172, 129.79881286621094, 27.76569366455078, 34.41460418701172, 76.85073852539062, 45.922523498535156, 42.523834228515625, 25.75328254699707, 35.68632507324219, 25.97020721435547, 59.0455322265625, 21.6087646484375, 25.029205322265625, 12.455947875976562, 17.728836059570312, 21.4195556640625, 31.502769470214844, 55.53115463256836, 22.564773559570312, 5.365673065185547, -2.270355224609375, 9.16986083984375, 25.998451232910156, 11.440032958984375, 52.309776306152344, 12.158302307128906, 38.984867095947266, 44.92149353027344, 34.49391174316406, -2.9422607421875, 27.611602783203125, 10.136444091796875, 24.307262420654297, 14.821990966796875, 18.292381286621094, 42.33453369140625, 5.970148086547852, 62.820831298828125, 56.693092346191406, 117.31784057617188, -10.649391174316406, 43.42835998535156, 97.27047729492188, 23.84716033935547, 18.15673828125, 15.828731536865234, 28.164443969726562, 22.130035400390625, 11.447196960449219, -8.825965881347656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000398.npy"} +{"epoch": 0.5844346549192364, "step": 399, "batch_size": 64, "mean": 34.47357940673828, "std": 25.490554809570312, "min": -16.744308471679688, "p10": 1.2411916732788086, "median": 34.38950729370117, "p90": 66.3838592529297, "max": 87.28109741210938, "pos_frac": 0.90625, "sample": [45.013458251953125, 26.70099639892578, 14.469768524169922, 41.3668212890625, 45.0091667175293, -11.990373611450195, 64.82896423339844, 35.58230209350586, 31.715553283691406, 27.80877685546875, 34.18016815185547, 14.773029327392578, 42.212013244628906, -5.31158447265625, 51.83740997314453, 16.451709747314453, 57.37797546386719, 9.347766876220703, 24.530977249145508, 15.763286590576172, 8.886627197265625, 11.771244049072266, 47.39381408691406, 25.92177391052246, 62.971099853515625, 76.5767822265625, 34.32634735107422, 51.03309631347656, -16.744308471679688, 33.41638946533203, 44.9998779296875, 84.13427734375, 53.804443359375, 23.34481430053711, 46.028228759765625, -12.916267395019531, 87.28109741210938, 49.651092529296875, 64.1455307006836, 39.22663879394531, 14.159423828125, 63.207916259765625, 47.04554748535156, -13.745574951171875, 11.933422088623047, 71.48591613769531, 1.3643360137939453, 36.405670166015625, 68.1525650024414, 5.495647430419922, 32.80238342285156, 57.2689208984375, 34.452667236328125, 55.61100769042969, 13.666271209716797, 66.6949462890625, 17.3411865234375, 60.52854537963867, 1.18841552734375, 67.57290649414062, 65.65798950195312, -0.3248138427734375, 18.249805450439453, 13.17314338684082], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000399.npy"} +{"epoch": 0.5859030837004405, "step": 400, "batch_size": 64, "mean": 38.233909606933594, "std": 31.20196533203125, "min": -27.43792724609375, "p10": 0.9593193054199235, "median": 34.80133819580078, "p90": 74.66145629882813, "max": 136.8827667236328, "pos_frac": 0.90625, "sample": [38.01641845703125, 47.834083557128906, 28.656784057617188, 40.635467529296875, 66.1990966796875, 33.87419128417969, 15.921417236328125, 54.39481735229492, 14.084983825683594, 20.65848159790039, 52.71154022216797, 19.77028465270996, 34.796478271484375, 34.80619812011719, -23.402572631835938, 39.434051513671875, 71.14082336425781, 9.929605484008789, 24.897506713867188, 44.541011810302734, 99.33673858642578, 65.17190551757812, 44.69496154785156, 75.50550842285156, 3.7217254638671875, 47.58122253417969, 40.40941619873047, 56.199974060058594, 15.777786254882812, 79.03117370605469, -1.7084217071533203, -9.100242614746094, -9.846611022949219, 21.740028381347656, 34.62995910644531, 22.602920532226562, 10.664405822753906, 85.48161315917969, 27.501129150390625, 43.33868408203125, 0.25733184814453125, 19.342578887939453, 37.865116119384766, 108.80321502685547, -27.43792724609375, 70.94215393066406, 28.961082458496094, 48.371681213378906, 31.381732940673828, 136.8827667236328, 30.4029541015625, 6.9068145751953125, 23.55738067626953, 52.67003631591797, 27.416725158691406, 72.69200134277344, -1.7286834716796875, 61.16206359863281, 62.56036376953125, 92.57330322265625, 2.5972900390625, 62.354461669921875, 63.849761962890625, 12.951560974121094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000400.npy"} +{"epoch": 0.5873715124816447, "step": 401, "batch_size": 64, "mean": 29.519914627075195, "std": 31.654132843017578, "min": -32.24431610107422, "p10": -6.396365547180175, "median": 23.015088081359863, "p90": 82.00961303710938, "max": 126.46450805664062, "pos_frac": 0.84375, "sample": [-4.726291656494141, 25.981292724609375, 15.584705352783203, 35.40656280517578, 35.59173583984375, 23.21970558166504, -9.479421615600586, -32.24431610107422, -10.103378295898438, -6.587162017822266, 8.203849792480469, -2.874908447265625, 21.03644561767578, 12.690423965454102, 8.4990234375, 3.258268356323242, 45.04087829589844, 56.36077880859375, 4.5262298583984375, 43.16412353515625, 0.101043701171875, -7.475456237792969, 42.346839904785156, 51.01470947265625, 14.330007553100586, 83.33003234863281, 68.90257263183594, 46.42626953125, 126.46450805664062, 16.574337005615234, 25.868698120117188, 13.840240478515625, 12.416725158691406, 24.84930419921875, -7.573551177978516, 8.409561157226562, 88.48533630371094, 3.269115447998047, 52.08032989501953, 37.21656799316406, 48.840911865234375, 57.009925842285156, 24.002273559570312, 3.45233154296875, 22.810470581054688, 18.550636291503906, 29.541854858398438, 20.69280242919922, 82.48641967773438, 18.285385131835938, 80.89706420898438, 40.6295166015625, 39.08957290649414, 23.28365135192871, 89.86065673828125, 90.23876953125, 8.763837814331055, 19.410554885864258, 51.887733459472656, -5.951173782348633, 62.35699462890625, 94.34915924072266, 10.123222351074219, -14.763811111450195], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000401.npy"} +{"epoch": 0.5888399412628488, "step": 402, "batch_size": 64, "mean": 38.97754669189453, "std": 23.835172653198242, "min": -10.785377502441406, "p10": 11.025777435302738, "median": 37.254255294799805, "p90": 70.41006546020509, "max": 104.41378021240234, "pos_frac": 0.96875, "sample": [57.93374252319336, 17.719785690307617, 9.978828430175781, 104.41378021240234, 29.621353149414062, 52.95418930053711, 7.3269195556640625, 13.959846496582031, 16.953630447387695, 25.987228393554688, 72.03309631347656, 68.4177474975586, 91.15001678466797, 22.096385955810547, 16.672698974609375, 42.485809326171875, 6.867546081542969, 42.906227111816406, 37.62982177734375, 42.163352966308594, 54.38030242919922, 7.2485809326171875, 62.258419036865234, -1.6487808227539062, 40.46867370605469, 61.302452087402344, 26.555023193359375, 95.6226806640625, 19.99273681640625, 20.803115844726562, 42.59588623046875, 22.82845687866211, 26.997509002685547, 26.531841278076172, 35.80378723144531, 32.262413024902344, 58.952171325683594, 46.51579284667969, 37.26004409790039, 84.92247772216797, 13.468658447265625, 6.218149185180664, 53.777610778808594, 74.16645812988281, 28.94955062866211, 22.88601303100586, 45.937835693359375, 49.14250946044922, 25.527034759521484, 34.515228271484375, 45.86112976074219, 71.263916015625, 44.22344207763672, 63.18605041503906, 17.367095947265625, 49.0133056640625, 50.409908294677734, 49.3880615234375, 37.24846649169922, -10.785377502441406, 26.933067321777344, 15.417465209960938, 37.05439758300781, 62.467498779296875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000402.npy"} +{"epoch": 0.5903083700440529, "step": 403, "batch_size": 64, "mean": 30.17224884033203, "std": 28.46843719482422, "min": -40.982303619384766, "p10": -2.803588485717772, "median": 25.07079792022705, "p90": 72.03259353637695, "max": 105.57167053222656, "pos_frac": 0.859375, "sample": [21.57964324951172, 31.637351989746094, 7.486745834350586, 43.45537567138672, 29.680511474609375, 1.3486766815185547, 30.462020874023438, 18.53276824951172, 3.294342041015625, 8.489641189575195, 21.0322265625, -1.1534423828125, -4.401519775390625, 63.063899993896484, 1.2104034423828125, 20.717750549316406, 22.21518325805664, 13.527055740356445, 87.17294311523438, 54.933868408203125, 38.07402038574219, 19.532936096191406, 72.06295776367188, 7.002616882324219, -10.056922912597656, 89.31622314453125, 63.195587158203125, 23.075271606445312, 33.32809829711914, 72.02734375, -3.405590057373047, 58.168212890625, 105.57167053222656, 30.47555160522461, 24.517738342285156, 48.72211456298828, -4.73210334777832, 36.643768310546875, 73.96984100341797, 72.03484344482422, -40.982303619384766, 25.58545684814453, 50.274871826171875, -1.3989181518554688, 67.24854278564453, 9.894105911254883, 91.16848754882812, 35.362274169921875, 19.760597229003906, 34.20890808105469, 56.894107818603516, 24.55613899230957, 32.10502624511719, -8.179634094238281, 23.891515731811523, 17.040985107421875, 8.723979949951172, 30.50506591796875, 17.635360717773438, 40.607364654541016, 35.580162048339844, -9.906288146972656, 18.34564208984375, 28.292760848999023], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000403.npy"} +{"epoch": 0.591776798825257, "step": 404, "batch_size": 64, "mean": 30.536544799804688, "std": 28.906705856323242, "min": -27.0700626373291, "p10": -2.543499755859374, "median": 25.928820610046387, "p90": 70.06002960205079, "max": 102.101806640625, "pos_frac": 0.859375, "sample": [21.788414001464844, 18.977840423583984, 26.48411750793457, 102.101806640625, 19.981544494628906, 38.75306701660156, 16.930198669433594, 67.877197265625, 22.318683624267578, 88.39945983886719, 60.05457305908203, 10.946304321289062, 10.045459747314453, 65.16041564941406, 12.537368774414062, 6.06512451171875, 70.99552917480469, 39.63645935058594, -1.1295795440673828, 22.484601974487305, -20.058063507080078, 18.194854736328125, 35.652000427246094, 53.592071533203125, 53.49562072753906, 44.78600311279297, -27.0700626373291, 45.71161651611328, 40.936607360839844, 7.2691802978515625, -4.077791213989258, 7.043928146362305, 67.58833312988281, 46.86372375488281, 29.672149658203125, 0.8863849639892578, -7.06060791015625, 36.78407287597656, 60.82307434082031, 9.912099838256836, -1.7175369262695312, 24.907989501953125, 34.58340835571289, 79.55987548828125, 3.2912025451660156, -2.924560546875, 1.308675765991211, 25.373523712158203, 35.2562255859375, -3.9129257202148438, -2.8974838256835938, 36.08814239501953, 27.679683685302734, 11.874168395996094, 64.60140228271484, 90.15431213378906, 39.265235900878906, 9.829269409179688, 7.216840744018555, 2.582571029663086, 87.93243408203125, 42.549842834472656, 45.731201171875, 74.6517105102539], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000404.npy"} +{"epoch": 0.593245227606461, "step": 405, "batch_size": 64, "mean": 30.217140197753906, "std": 29.265989303588867, "min": -16.990333557128906, "p10": -1.7223726272583004, "median": 22.36363124847412, "p90": 71.99656982421875, "max": 116.83235168457031, "pos_frac": 0.875, "sample": [41.95573425292969, 65.00527954101562, 40.133384704589844, 22.71546745300293, 15.293357849121094, 8.6033935546875, -11.379104614257812, 13.5943603515625, 11.804647445678711, -2.67254638671875, 29.935422897338867, 51.993743896484375, 104.1762924194336, -16.990333557128906, 11.934484481811523, 79.88191223144531, 2.877836227416992, 17.306438446044922, 45.197044372558594, 0.8264923095703125, 36.199378967285156, 26.565536499023438, 17.20263671875, 50.640655517578125, 13.562417984008789, 23.79791831970215, 42.143287658691406, 18.266128540039062, 4.012054443359375, -4.369951248168945, 55.41984939575195, -1.8853130340576172, 17.69534683227539, 22.078290939331055, 50.686767578125, 14.851739883422852, 20.361495971679688, -1.3421783447265625, 2.243732452392578, 10.867412567138672, 71.8233642578125, 72.07080078125, 80.56375122070312, 47.404624938964844, 28.081165313720703, 45.36992645263672, 76.46755981445312, 14.936241149902344, 11.412841796875, 29.756378173828125, 44.806724548339844, 9.528907775878906, -13.310178756713867, 84.0216064453125, 12.135169982910156, 4.27252197265625, 43.52215576171875, 13.58917236328125, 56.34941864013672, 116.83235168457031, 54.23454284667969, -12.637351989746094, 22.648971557617188, 68.82582092285156], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000405.npy"} +{"epoch": 0.5947136563876652, "step": 406, "batch_size": 64, "mean": 31.772830963134766, "std": 28.3826961517334, "min": -30.908905029296875, "p10": 0.22066898345947494, "median": 28.54346466064453, "p90": 71.41479873657227, "max": 109.26414489746094, "pos_frac": 0.890625, "sample": [27.261062622070312, 9.227230072021484, 14.169792175292969, 46.37713623046875, 53.458343505859375, 13.674644470214844, 24.342018127441406, 14.660221099853516, 109.26414489746094, 28.652748107910156, 60.2286491394043, -0.7277069091796875, 86.63409423828125, 34.9232177734375, 44.52937316894531, 23.57010269165039, 35.179283142089844, 67.44679260253906, 53.38470458984375, -2.473630905151367, 59.199501037597656, 56.338768005371094, 43.766571044921875, -30.908905029296875, 36.44390869140625, 15.276260375976562, 12.97572135925293, -4.370006561279297, 40.222869873046875, -6.263710021972656, 5.918800354003906, 38.999759674072266, 22.969329833984375, -29.33819580078125, 3.8986854553222656, 72.42234802246094, 60.59974670410156, 14.263656616210938, 26.64250373840332, -1.9271278381347656, 32.458953857421875, 50.69207763671875, 81.67809295654297, 7.931388854980469, 15.255508422851562, 10.209770202636719, 72.0174560546875, 49.09931182861328, 8.403656005859375, 12.4459228515625, 70.00859832763672, 79.70359802246094, 15.021980285644531, 6.37274169921875, 2.857421875, 2.4335460662841797, 28.434181213378906, 30.416091918945312, 75.34587097167969, 50.523101806640625, 64.32339477539062, 43.24835205078125, 29.391643524169922, 14.275829315185547], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000406.npy"} +{"epoch": 0.5961820851688693, "step": 407, "batch_size": 64, "mean": 32.281612396240234, "std": 29.783952713012695, "min": -30.256179809570312, "p10": -5.100197982788085, "median": 28.05343246459961, "p90": 72.55829315185548, "max": 95.6281509399414, "pos_frac": 0.859375, "sample": [12.659818649291992, 71.18534851074219, 10.070938110351562, -10.356599807739258, -27.944355010986328, 28.154502868652344, 43.900474548339844, -4.030189514160156, 53.66778564453125, -5.558773040771484, 59.191802978515625, 30.000396728515625, 47.36054229736328, 81.07332611083984, 85.27397155761719, 2.601896286010742, 44.93686294555664, 58.18592071533203, 24.937705993652344, 22.60515594482422, 27.952362060546875, -5.8556976318359375, 21.769577026367188, 73.14669799804688, 66.34808349609375, 20.01580810546875, 61.763668060302734, 11.916072845458984, 61.93296813964844, 41.529258728027344, 5.499641418457031, 74.45133972167969, 21.879379272460938, 14.452491760253906, 17.747413635253906, 27.374038696289062, 36.265228271484375, 93.19363403320312, 38.511287689208984, -30.256179809570312, 37.44026184082031, 35.14104080200195, 9.509765625, 40.24512481689453, 55.94270324707031, 21.429367065429688, 7.839935302734375, 54.35418701171875, 36.14753723144531, 16.279495239257812, 95.6281509399414, -8.033729553222656, 10.43936538696289, 67.4822998046875, -0.4025421142578125, 6.97429084777832, 1.746572494506836, 65.51136016845703, 3.7097244262695312, 20.597198486328125, 82.79144287109375, 54.30426025390625, -9.627349853515625, 53.01921081542969], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000407.npy"} +{"epoch": 0.5976505139500734, "step": 408, "batch_size": 64, "mean": 27.853038787841797, "std": 27.439891815185547, "min": -19.864295959472656, "p10": -3.3349412918090815, "median": 26.381354331970215, "p90": 52.96138687133789, "max": 113.87642669677734, "pos_frac": 0.84375, "sample": [11.825660705566406, 74.95079040527344, 87.21572875976562, 23.88011932373047, 22.166885375976562, 47.14399719238281, 6.037200927734375, 29.608671188354492, 34.83769989013672, -9.978996276855469, 22.70207405090332, 7.6414337158203125, -10.475929260253906, 31.80609130859375, -19.864295959472656, -1.24774169921875, 38.29017639160156, 42.99058532714844, 23.304122924804688, 28.88258934020996, 23.450115203857422, 41.53047180175781, 52.90687561035156, 52.98474884033203, -11.006210327148438, 12.811775207519531, -3.6280879974365234, 6.3918914794921875, 21.81446075439453, 53.979156494140625, -16.37364387512207, 4.980255126953125, 49.303375244140625, 4.578550338745117, -10.779857635498047, 5.0823974609375, 35.96959686279297, 39.91542053222656, 20.596694946289062, 45.36190414428711, 49.1304931640625, 14.983306884765625, 34.10919189453125, 48.4937744140625, 36.27521514892578, 1.4481048583984375, 50.69633483886719, 11.107961654663086, -2.6509323120117188, 8.88690185546875, 4.259389877319336, 93.51812744140625, -0.22498321533203125, 5.010551452636719, 52.80842590332031, 40.527618408203125, 7.748847961425781, 31.75464630126953, 52.29515075683594, 43.96965026855469, 113.87642669677734, 83.56947326660156, 41.017242431640625, 38.396827697753906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000408.npy"} +{"epoch": 0.5991189427312775, "step": 409, "batch_size": 64, "mean": 30.92287254333496, "std": 25.655141830444336, "min": -12.047889709472656, "p10": 1.1068693161010745, "median": 25.957639694213867, "p90": 69.23589859008788, "max": 100.93586730957031, "pos_frac": 0.921875, "sample": [-0.903961181640625, -12.047889709472656, 57.195350646972656, 25.089075088500977, 46.191123962402344, 54.953460693359375, 24.2464656829834, 40.9683837890625, 25.070541381835938, 27.765762329101562, 68.97527313232422, 32.566795349121094, 0.9565505981445312, 43.71421813964844, 76.88237762451172, 11.186752319335938, 11.236373901367188, 24.429866790771484, 34.181514739990234, 67.75131225585938, 27.672470092773438, -9.172370910644531, 73.50482940673828, 5.661174774169922, 69.34759521484375, 3.1330108642578125, 15.289093017578125, 16.049083709716797, 9.512672424316406, 31.546512603759766, 17.560195922851562, 84.27442169189453, 50.68901062011719, 17.100929260253906, 19.653711318969727, 83.83563232421875, -5.219871520996094, 32.5244026184082, -2.9642257690429688, 14.039405822753906, 28.219581604003906, 4.394523620605469, 38.1617431640625, 10.481632232666016, 27.26221466064453, 6.636451721191406, 26.826204299926758, 35.004371643066406, 65.25865936279297, 36.70028305053711, 14.624746322631836, 35.035804748535156, 1.4576129913330078, 19.585651397705078, 54.16223907470703, 80.9187240600586, 50.11239242553711, 46.063201904296875, 22.254989624023438, 17.496017456054688, 100.93586730957031, 0.6579742431640625, 21.92791748046875, 20.43799591064453], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000409.npy"} +{"epoch": 0.6005873715124816, "step": 410, "batch_size": 64, "mean": 27.859020233154297, "std": 26.466480255126953, "min": -19.219940185546875, "p10": -4.101694488525388, "median": 27.212435722351074, "p90": 69.64659042358399, "max": 89.92950439453125, "pos_frac": 0.84375, "sample": [55.063201904296875, 20.1373291015625, -1.9379653930664062, 1.4694442749023438, 39.11326599121094, 32.23569869995117, 37.481689453125, 70.62752532958984, 42.4979362487793, 6.023384094238281, 15.98097038269043, 17.43511199951172, 9.1295166015625, 53.370208740234375, 2.0526065826416016, 7.958049774169922, 76.80140686035156, -10.993186950683594, -5.639339447021484, 58.027427673339844, 62.38029098510742, 89.92950439453125, 24.244518280029297, 34.52088928222656, 43.302833557128906, 33.24205017089844, -5.516315460205078, -13.517887115478516, 67.35774230957031, 79.80035400390625, 17.435806274414062, -0.7150363922119141, 34.522361755371094, 11.298171997070312, 39.577239990234375, 8.688583374023438, -11.11085319519043, -0.6930675506591797, 7.893524169921875, 6.706695556640625, 5.8717193603515625, 38.695552825927734, 30.84099578857422, 72.77510070800781, 28.742942810058594, 4.4981536865234375, 40.48541259765625, 9.972763061523438, 76.81851196289062, 40.78105163574219, 35.33000183105469, 4.649892807006836, 28.83087158203125, 41.83720397949219, 36.95587921142578, 61.111053466796875, 25.681928634643555, 44.0796012878418, -19.219940185546875, -5.0290069580078125, 24.60045623779297, 11.505407333374023, 71.11582946777344, 15.864158630371094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000410.npy"} +{"epoch": 0.6020558002936858, "step": 411, "batch_size": 64, "mean": 29.69510841369629, "std": 28.803665161132812, "min": -16.01111602783203, "p10": -2.8065376281738277, "median": 25.502796173095703, "p90": 65.8371192932129, "max": 123.35232543945312, "pos_frac": 0.859375, "sample": [34.397151947021484, 25.111141204833984, 19.200714111328125, -11.102783203125, -2.194000244140625, 5.282562255859375, 2.3358726501464844, 28.82728385925293, 25.327667236328125, 40.69416809082031, 23.853790283203125, 14.22700309753418, 66.6142807006836, 5.341320037841797, 12.238122940063477, 21.902772903442383, 11.11216926574707, 73.4620361328125, 25.83400535583496, 60.79212188720703, 106.57960510253906, 54.47132110595703, -3.0690536499023438, -5.2169189453125, -16.01111602783203, 33.07087707519531, 42.86039733886719, -5.7153472900390625, 20.363327026367188, 81.743408203125, 8.99505615234375, 53.28717803955078, 35.79425048828125, 15.392841339111328, 48.64830017089844, -2.093313217163086, 17.18950653076172, 64.02374267578125, 48.37845993041992, 28.439315795898438, 17.959524154663086, 48.02188491821289, 25.67792510986328, 53.018653869628906, 34.098289489746094, 31.908645629882812, 27.56789779663086, 4.79241943359375, 123.35232543945312, 47.010902404785156, 9.954387664794922, 75.68875885009766, 51.23461151123047, 93.00119018554688, 47.92247772216797, -13.521247863769531, 0.685150146484375, 27.76214599609375, 23.218116760253906, 7.778871536254883, 40.18870544433594, 2.30255126953125, 14.220108032226562, -3.746593475341797], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000411.npy"} +{"epoch": 0.6035242290748899, "step": 412, "batch_size": 64, "mean": 29.459095001220703, "std": 27.281654357910156, "min": -6.75126838684082, "p10": -2.548849487304687, "median": 24.636157989501953, "p90": 71.17525711059572, "max": 100.99809265136719, "pos_frac": 0.859375, "sample": [-3.1637821197509766, 57.384090423583984, 49.85974884033203, 32.38105773925781, 76.92167663574219, 41.22051239013672, 51.488189697265625, 24.285865783691406, 13.1798095703125, 84.98214721679688, 4.601863861083984, 7.4692535400390625, 15.553081512451172, 38.06389617919922, 5.508459091186523, 38.182037353515625, 34.78936004638672, -4.467350006103516, 17.33245849609375, 25.11773681640625, 35.439483642578125, 10.436973571777344, 34.432579040527344, 25.407377243041992, 15.301544189453125, 5.992885589599609, 1.6470184326171875, 21.615238189697266, -4.1508636474609375, 47.400657653808594, 8.745723724365234, 10.290050506591797, 31.819374084472656, 9.008804321289062, 41.824241638183594, 12.031820297241211, -2.0536155700683594, 16.421512603759766, 10.450614929199219, 78.6925048828125, 45.2041015625, -3.5255661010742188, 86.07713317871094, 41.16911315917969, 62.12083435058594, 22.521167755126953, 6.0428924560546875, 62.729034423828125, -2.2454833984375, 60.64353942871094, 64.82088470458984, 1.4386882781982422, 5.474090576171875, 12.333221435546875, 73.8985595703125, 100.99809265136719, -5.748750686645508, 50.487030029296875, 82.68061828613281, -6.75126838684082, 24.9864501953125, -2.678863525390625, 47.633331298828125, 33.62928771972656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000412.npy"} +{"epoch": 0.604992657856094, "step": 413, "batch_size": 64, "mean": 29.902385711669922, "std": 25.80487823486328, "min": -32.733367919921875, "p10": 3.6720817565917985, "median": 25.96346092224121, "p90": 59.09532852172852, "max": 104.16268920898438, "pos_frac": 0.921875, "sample": [14.962799072265625, 0.8230705261230469, 24.303251266479492, 11.51988410949707, 54.80276870727539, -14.410873413085938, 2.9112625122070312, 26.736412048339844, 17.003868103027344, 17.393417358398438, 32.84463882446289, 59.16199493408203, 18.014686584472656, 55.72997283935547, 10.500396728515625, 17.594533920288086, 25.48284149169922, 6.255039215087891, 7.674041748046875, 104.16268920898438, 6.3448486328125, 58.93977355957031, 31.586944580078125, -32.733367919921875, 5.44732666015625, 38.94769287109375, 17.458953857421875, 16.071884155273438, 24.923721313476562, 78.96601867675781, 55.76893615722656, 24.56754493713379, 46.8780517578125, 39.3824462890625, 11.14422607421875, 78.67066955566406, 62.338104248046875, -5.71624755859375, 30.14366912841797, 88.08782958984375, 40.51762390136719, 37.27278137207031, 49.48797607421875, 38.49656677246094, 27.391525268554688, 54.16227722167969, 54.56922912597656, -7.9629974365234375, -7.1189727783203125, 29.035919189453125, 26.444080352783203, 37.25334167480469, 55.36723327636719, 12.580436706542969, 5.50651741027832, 31.76530647277832, 11.381637573242188, 83.92681884765625, 19.121479034423828, 15.139053344726562, 38.76109313964844, 48.793277740478516, 20.970977783203125, 20.203838348388672], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000413.npy"} +{"epoch": 0.6064610866372981, "step": 414, "batch_size": 64, "mean": 35.684043884277344, "std": 27.852243423461914, "min": -11.586601257324219, "p10": 3.590279960632325, "median": 30.399629592895508, "p90": 73.42118606567384, "max": 112.14984130859375, "pos_frac": 0.9375, "sample": [53.09192657470703, 37.421791076660156, 44.110660552978516, 6.309566497802734, 16.956195831298828, 38.41847229003906, 3.3405532836914062, -10.854873657226562, 12.808975219726562, 33.431060791015625, 25.993820190429688, 57.285675048828125, 72.62830352783203, 73.01123809814453, 19.003021240234375, 13.415506362915039, 36.502525329589844, 21.669448852539062, 4.172975540161133, 11.363779067993164, 30.147991180419922, 39.80980682373047, 17.901187896728516, 29.660953521728516, 28.53173828125, 52.59858703613281, 75.86659240722656, 19.885297775268555, 47.38859558105469, 55.90234375, 82.40850830078125, 41.924217224121094, 7.284400939941406, 21.51592254638672, 89.61898040771484, 13.16853141784668, 26.469833374023438, -11.586601257324219, -10.493759155273438, 18.186595916748047, 13.001358032226562, 54.119232177734375, 64.75568389892578, -3.0142574310302734, 2.0490036010742188, 112.14984130859375, 26.179229736328125, 64.02210998535156, 9.114620208740234, 40.19041061401367, 37.964752197265625, 57.538238525390625, 30.651268005371094, 40.55023193359375, 20.181659698486328, 40.6112060546875, 81.57476806640625, 2.3740997314453125, 61.0936279296875, 73.59687805175781, 107.20256042480469, 15.246021270751953, 29.96131134033203, 56.39459228515625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000414.npy"} +{"epoch": 0.6079295154185022, "step": 415, "batch_size": 64, "mean": 29.6812686920166, "std": 24.819744110107422, "min": -5.43707275390625, "p10": 2.4643175125122077, "median": 24.780954360961914, "p90": 58.76277389526368, "max": 114.40155029296875, "pos_frac": 0.9375, "sample": [61.66258239746094, 10.4290771484375, 54.750701904296875, 86.66181945800781, 6.07513427734375, 40.06243133544922, 34.31285095214844, 21.001361846923828, 26.749309539794922, 53.59716796875, 55.02959442138672, 39.672996520996094, 47.405189514160156, 58.034217834472656, 36.05120849609375, 10.88400650024414, 29.930503845214844, 4.49029541015625, 20.628036499023438, 7.8242950439453125, 5.259130477905273, 45.27471160888672, 11.719329833984375, 59.07501220703125, 18.59735870361328, 31.71853256225586, 74.86827087402344, -1.2668533325195312, 27.91796875, 38.59765625, 8.803253173828125, 12.52187728881836, 17.7017822265625, 0.6282806396484375, 114.40155029296875, 0.7968177795410156, -0.7562122344970703, 44.54103088378906, 44.599388122558594, 51.963653564453125, 4.244747161865234, 19.594959259033203, 2.2620162963867188, 14.235885620117188, 33.144901275634766, 40.043304443359375, 8.376775741577148, 14.272842407226562, 48.40093231201172, 23.458438873291016, -1.229583740234375, 54.99388122558594, 19.987985610961914, 26.103469848632812, 12.096076965332031, 2.9363536834716797, 12.094093322753906, -5.43707275390625, 86.0145492553711, 12.49755859375, 64.58485412597656, 44.619998931884766, 11.410346984863281, 38.67854309082031], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000415.npy"} +{"epoch": 0.6093979441997063, "step": 416, "batch_size": 64, "mean": 36.515438079833984, "std": 28.901609420776367, "min": -14.394332885742188, "p10": 1.2617874145507826, "median": 35.503929138183594, "p90": 72.17559127807617, "max": 114.41659545898438, "pos_frac": 0.921875, "sample": [37.108642578125, 12.880256652832031, 12.57366943359375, 67.62939453125, -14.394332885742188, 95.14813232421875, 32.80998229980469, 18.495643615722656, 50.648773193359375, 95.72159576416016, 37.86481475830078, 35.933937072753906, 37.771331787109375, 26.464717864990234, 54.214508056640625, 34.245582580566406, 11.132181167602539, -7.020683288574219, 72.41679382324219, 21.808696746826172, 35.87364196777344, 18.922767639160156, 9.656322479248047, 71.24767303466797, 71.61278533935547, 35.13421630859375, 45.168434143066406, 65.25686645507812, 0.3701934814453125, 43.8363037109375, 17.059112548828125, 36.851356506347656, 114.41659545898438, 4.2434234619140625, -2.9201316833496094, -10.003194808959961, 48.993553161621094, 65.65914916992188, 28.115074157714844, 23.692718505859375, 30.789993286132812, 109.18429565429688, 48.17771911621094, 18.352279663085938, 38.5374755859375, 41.1043586730957, 33.298057556152344, 77.9469985961914, 23.14158058166504, 17.190296173095703, 0.7447776794433594, 36.02497863769531, 8.3121337890625, 76.59806823730469, 20.529420852661133, -12.65887451171875, 65.06558227539062, 2.4681434631347656, 37.90983581542969, 23.373184204101562, 14.56120491027832, 45.46617889404297, 71.50856018066406, 52.72111511230469], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000416.npy"} +{"epoch": 0.6108663729809104, "step": 417, "batch_size": 64, "mean": 34.968475341796875, "std": 23.686065673828125, "min": -4.384225845336914, "p10": 3.187281608581546, "median": 37.111473083496094, "p90": 64.36595497131349, "max": 85.22840881347656, "pos_frac": 0.921875, "sample": [31.87544822692871, 36.15294647216797, 0.4079113006591797, 12.837432861328125, -0.4736976623535156, 39.12312316894531, 25.58349609375, 6.535272598266602, 9.654790878295898, 82.25772857666016, 25.815425872802734, 49.2850341796875, 14.381725311279297, 17.61892318725586, 47.21746826171875, 16.269065856933594, -4.384225845336914, 52.49406433105469, 54.773773193359375, 28.743377685546875, 49.14253234863281, 17.14093017578125, 39.573219299316406, 53.96482849121094, 53.01729965209961, 50.51478576660156, 56.13174819946289, 18.47240447998047, 64.69112396240234, 53.80946350097656, 6.293083190917969, 12.870330810546875, 16.343393325805664, 19.338523864746094, 36.25580596923828, 51.57643127441406, 63.0301628112793, 1.8842945098876953, 59.352203369140625, 37.967140197753906, -0.5571384429931641, 63.60722732543945, 42.25115966796875, 49.70534133911133, -3.7153778076171875, 75.88402557373047, 78.61431121826172, 10.92338752746582, 38.756805419921875, 40.653961181640625, 74.80122375488281, -2.2751731872558594, 46.48005294799805, 18.008567810058594, 31.269031524658203, 22.91986083984375, 6.2275848388671875, 49.726524353027344, 85.22840881347656, 70.64205169677734, 39.5189208984375, 47.461029052734375, 34.78584289550781, 9.526054382324219], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000417.npy"} +{"epoch": 0.6123348017621145, "step": 418, "batch_size": 64, "mean": 34.140960693359375, "std": 32.01416778564453, "min": -41.79039001464844, "p10": 0.5435695648193363, "median": 29.066360473632812, "p90": 76.29382858276374, "max": 127.02761840820312, "pos_frac": 0.90625, "sample": [53.718238830566406, 1.2579498291015625, 46.1069450378418, 4.458225250244141, 26.494049072265625, 106.06590270996094, 4.126106262207031, 14.600055694580078, 48.31036376953125, 44.44011306762695, 32.56878662109375, -14.879035949707031, 59.65830993652344, 30.97515869140625, 91.20783996582031, 56.57200622558594, 107.19198608398438, -20.044174194335938, 26.764312744140625, 11.378128051757812, 10.972061157226562, 92.774658203125, 21.18340301513672, 18.572021484375, 0.4018974304199219, 22.31818389892578, 56.275909423828125, 52.46240234375, 27.157562255859375, 9.617408752441406, 54.619384765625, 15.133926391601562, 34.620521545410156, 38.489349365234375, 12.74359130859375, 20.032203674316406, -4.324743270874023, 42.595577239990234, 15.413162231445312, 60.805030822753906, 127.02761840820312, -6.578338623046875, 24.847938537597656, 47.85101318359375, 41.52225875854492, 51.27416229248047, 24.181861877441406, 44.204994201660156, 1.6973876953125, -41.79039001464844, 17.235763549804688, 44.51554870605469, 48.64166259765625, 0.8741378784179688, 52.637176513671875, 16.6455078125, 18.149253845214844, 93.77346801757812, 9.393318176269531, -2.572986602783203, 52.503021240234375, 82.931884765625, 48.57353210449219, 54.646934509277344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000418.npy"} +{"epoch": 0.6138032305433186, "step": 419, "batch_size": 64, "mean": 33.94792938232422, "std": 24.165023803710938, "min": -14.420141220092773, "p10": 8.107709503173828, "median": 29.13538932800293, "p90": 65.22945251464844, "max": 97.59650421142578, "pos_frac": 0.953125, "sample": [53.706878662109375, -14.420141220092773, 40.1512451171875, 41.289146423339844, 56.43782043457031, 22.199840545654297, -3.0291404724121094, -2.8379249572753906, 3.093170166015625, 62.5048828125, 28.410842895507812, 8.766677856445312, 60.97093200683594, 46.578094482421875, 29.27573585510254, 32.68937301635742, 31.307960510253906, 24.905086517333984, 16.87329864501953, 38.85762023925781, 31.965763092041016, 55.48077392578125, 8.521759033203125, 46.515357971191406, 12.57843017578125, 7.930259704589844, 14.718399047851562, 22.428110122680664, 85.68733215332031, 72.27587890625, 44.15370178222656, 3.2654056549072266, 49.41323471069336, 13.904624938964844, 13.084854125976562, 34.805152893066406, 26.924118041992188, 54.349395751953125, 35.926536560058594, 16.4901123046875, 72.89852905273438, 27.874282836914062, 32.828125, 18.667003631591797, 12.628835678100586, 66.39712524414062, 28.99504280090332, 45.370208740234375, 97.59650421142578, 56.028785705566406, 78.16278076171875, 12.318046569824219, 10.519859313964844, 92.16516876220703, 59.20139694213867, 57.0279541015625, 26.37657928466797, 6.868705749511719, 10.415847778320312, 40.30818176269531, 16.68450927734375, 28.357681274414062, 27.16071319580078, 19.665084838867188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000419.npy"} +{"epoch": 0.6152716593245228, "step": 420, "batch_size": 64, "mean": 32.95982360839844, "std": 27.747848510742188, "min": -24.143829345703125, "p10": 3.983855056762696, "median": 28.494515419006348, "p90": 71.20892486572265, "max": 100.21612548828125, "pos_frac": 0.90625, "sample": [39.40443420410156, 31.43414306640625, 10.0330810546875, 22.98474884033203, 28.07733154296875, -14.400674819946289, 21.33448028564453, 9.53271484375, 52.54639434814453, 71.23894500732422, -2.2370452880859375, 33.03295135498047, 9.63015365600586, 3.616260528564453, 14.440277099609375, 20.443138122558594, 42.592041015625, 21.709362030029297, 13.80401611328125, 44.069671630859375, -13.755475997924805, 67.33219146728516, -9.342254638671875, 93.97154998779297, 19.069427490234375, 44.99053955078125, 49.05999755859375, 39.90703582763672, 19.138572692871094, 25.108482360839844, 35.57524871826172, 48.64945983886719, 64.06245422363281, 28.911699295043945, 35.52897644042969, 71.13887786865234, 10.818252563476562, 22.44631576538086, 43.715538024902344, 99.28727722167969, -1.250082015991211, 50.74273681640625, 73.63717651367188, 5.531776428222656, 50.58042907714844, 24.457874298095703, 76.94813537597656, 40.59735870361328, 69.43934631347656, 17.441314697265625, 18.344602584838867, -24.143829345703125, 5.3631134033203125, 45.40254592895508, 4.841575622558594, 100.21612548828125, 12.920196533203125, 29.198902130126953, 33.34544372558594, 20.92754364013672, 90.80769348144531, 22.86761474609375, 16.30010986328125, 56.01031494140625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000420.npy"} +{"epoch": 0.6167400881057269, "step": 421, "batch_size": 64, "mean": 27.947509765625, "std": 27.135541915893555, "min": -27.266998291015625, "p10": -2.39073028564453, "median": 26.37243938446045, "p90": 63.69434509277344, "max": 91.4513931274414, "pos_frac": 0.875, "sample": [53.27422332763672, 6.286285400390625, 31.142545700073242, 0.7112350463867188, 41.37700653076172, 29.71971893310547, 73.43354797363281, 10.085319519042969, 0.2431964874267578, 11.499553680419922, 40.62467956542969, 20.545291900634766, 56.23870849609375, 70.55010223388672, 43.049041748046875, 30.706146240234375, 61.81712341308594, -8.342735290527344, 27.017457962036133, 45.27935791015625, 91.36543273925781, 62.9639892578125, 42.34767150878906, 3.3840904235839844, 16.231369018554688, -1.1268081665039062, 37.5498046875, 19.396987915039062, 21.44355010986328, 29.473846435546875, 29.497459411621094, 38.621543884277344, 86.91244506835938, 49.67622375488281, 9.061763763427734, 3.273853302001953, 35.79250717163086, 17.348976135253906, -27.266998291015625, 91.4513931274414, 1.150146484375, -6.5229034423828125, 64.00735473632812, 85.74932861328125, 23.891799926757812, 39.019195556640625, 16.18622589111328, 16.9337215423584, 6.228288650512695, 18.10564422607422, 25.727420806884766, 20.58045196533203, 44.22382354736328, -15.02252197265625, -18.04204559326172, 34.84965133666992, 42.814064025878906, 3.8065223693847656, -10.770965576171875, 46.27020263671875, 2.091012954711914, -2.9324111938476562, 15.419792175292969, 32.21990966796875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000421.npy"} +{"epoch": 0.618208516886931, "step": 422, "batch_size": 64, "mean": 30.136600494384766, "std": 28.6448917388916, "min": -15.378921508789062, "p10": -5.154238891601561, "median": 26.464195251464844, "p90": 67.8453437805176, "max": 114.09463500976562, "pos_frac": 0.859375, "sample": [55.75636291503906, 63.789886474609375, 80.07323455810547, 38.915443420410156, 40.63671875, -5.8149566650390625, 2.7022857666015625, 76.18350219726562, 31.359716415405273, 114.09463500976562, -2.2789154052734375, 50.65376281738281, -3.6125640869140625, 11.365402221679688, 17.891773223876953, 11.645477294921875, 28.0286922454834, -14.784767150878906, 15.652446746826172, 58.67463684082031, 27.340660095214844, 108.43182373046875, 17.09386444091797, -6.067869186401367, 56.959781646728516, 53.75121307373047, 16.912948608398438, 14.244865417480469, 12.439641952514648, 36.64433288574219, 75.90406799316406, 70.96516418457031, 16.97900390625, 13.763481140136719, 6.838817596435547, 27.8364200592041, 69.5833969116211, -10.491279602050781, 30.393081665039062, -11.684135437011719, 0.8183937072753906, 30.80963897705078, 25.587730407714844, 0.05438232421875, 8.345806121826172, 55.53066635131836, 39.937255859375, 53.93933868408203, 20.071311950683594, 15.011331558227539, 48.24403762817383, 20.46435546875, 16.962310791015625, 23.06119155883789, 49.73597717285156, 49.06658935546875, 19.694091796875, -15.378921508789062, 49.874481201171875, 47.78181457519531, -9.754955291748047, 1.1322174072265625, 48.60662078857422, 30.37470245361328], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000422.npy"} +{"epoch": 0.6196769456681351, "step": 423, "batch_size": 64, "mean": 31.304004669189453, "std": 29.750967025756836, "min": -28.430749893188477, "p10": -2.4560897827148436, "median": 31.01981544494629, "p90": 74.03501815795902, "max": 104.8265151977539, "pos_frac": 0.875, "sample": [1.740325927734375, 43.887977600097656, -6.989715576171875, 35.175262451171875, -11.209182739257812, -9.306709289550781, 64.70616912841797, -5.042655944824219, 48.18180847167969, 44.70512390136719, 81.20259857177734, 10.69666862487793, 27.924758911132812, 36.521331787109375, 55.231502532958984, -6.071319580078125, 102.2619857788086, 104.8265151977539, 60.7161865234375, 46.36265182495117, 10.499099731445312, 4.3674468994140625, 47.705535888671875, 41.4913330078125, 78.03309631347656, 13.191925048828125, 20.830232620239258, 3.8342819213867188, -28.430749893188477, 2.5222625732421875, 26.124055862426758, 33.09950637817383, 15.222740173339844, 50.37500762939453, 101.71279907226562, 55.99540710449219, 9.061929702758789, 50.735679626464844, -2.474893569946289, 31.741836547851562, 31.584075927734375, 50.54054260253906, 32.870452880859375, 16.649662017822266, 99.930908203125, 30.90761947631836, 11.944061279296875, 8.461463928222656, 47.41911315917969, 10.900039672851562, 11.34725570678711, 13.438892364501953, 11.960845947265625, 47.41365051269531, 44.80152130126953, 31.13201141357422, -2.4122142791748047, 20.295557022094727, 21.442813873291016, 1.515899658203125, 47.259056091308594, 8.529853820800781, 34.530426025390625, 79.83309173583984], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000423.npy"} +{"epoch": 0.6211453744493393, "step": 424, "batch_size": 64, "mean": 34.16087341308594, "std": 30.783992767333984, "min": -15.3675537109375, "p10": -0.4809219360351554, "median": 30.520843505859375, "p90": 83.20059051513675, "max": 119.94644165039062, "pos_frac": 0.890625, "sample": [-9.975234985351562, 86.44996643066406, 34.36487579345703, 11.39616584777832, -0.9906387329101562, 32.41616439819336, 46.614532470703125, 57.85649871826172, 21.63819122314453, -6.73529052734375, 50.77696228027344, 24.28466796875, 40.07073974609375, 5.478700637817383, 34.80463790893555, 108.66712951660156, 23.55077362060547, 50.385345458984375, 27.665315628051758, 41.412208557128906, 48.54346466064453, 16.7768497467041, 98.77799987792969, 28.173255920410156, 45.94399642944336, -4.006927490234375, 0.408935546875, 61.193817138671875, -4.520542144775391, 33.20451736450195, 9.087392807006836, 102.75807189941406, 14.02560043334961, 20.216201782226562, 30.777023315429688, 52.18864440917969, 75.61871337890625, -15.3675537109375, 15.41912841796875, 25.671810150146484, 15.716047286987305, 26.075546264648438, -0.8622894287109375, 12.986930847167969, 32.828582763671875, 25.486976623535156, 34.4329833984375, 0.4185600280761719, 4.3088226318359375, 119.94644165039062, 24.96880340576172, 32.76499938964844, 30.264663696289062, 35.259483337402344, 92.97138977050781, 11.200458526611328, 38.94160461425781, 5.4762420654296875, 18.829374313354492, 115.4595947265625, 46.357627868652344, 51.80231857299805, 42.84865951538086, 32.79005432128906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000424.npy"} +{"epoch": 0.6226138032305433, "step": 425, "batch_size": 64, "mean": 30.633705139160156, "std": 25.08201789855957, "min": -31.212093353271484, "p10": -0.8593372344970671, "median": 31.576122283935547, "p90": 61.771606063842775, "max": 103.90589141845703, "pos_frac": 0.890625, "sample": [-2.193584442138672, 2.5322799682617188, 50.553794860839844, -31.212093353271484, -11.697219848632812, 52.913421630859375, 20.492088317871094, 46.01338195800781, 31.149566650390625, 49.507659912109375, 12.780891418457031, 53.98347473144531, 5.053436279296875, 5.23358154296875, 28.963565826416016, 47.14543151855469, -2.28668212890625, 12.37701416015625, 33.637298583984375, 2.25390625, 39.917510986328125, 39.74348449707031, 8.77877426147461, 65.48652648925781, 64.06346130371094, -7.559608459472656, 30.778362274169922, 20.34807586669922, 61.99441909790039, 66.05096435546875, 27.039772033691406, 44.47015380859375, 42.586753845214844, 9.134162902832031, 48.90362548828125, 20.316619873046875, 35.71492004394531, 21.769378662109375, 15.345794677734375, 61.251708984375, 7.191577911376953, 57.66923522949219, 36.645172119140625, -4.5494384765625, 44.546104431152344, 58.448699951171875, 12.676807403564453, 44.25170135498047, 15.184562683105469, 103.90589141845703, 32.00267791748047, 50.30718231201172, 2.5753097534179688, 8.82513427734375, 14.522346496582031, 44.39326477050781, 44.03451156616211, 73.03329467773438, 37.51032257080078, 65.10896301269531, 18.92487335205078, -8.464378356933594, 54.201499938964844, 24.27582550048828], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000425.npy"} +{"epoch": 0.6240822320117474, "step": 426, "batch_size": 64, "mean": 33.87653350830078, "std": 26.95366096496582, "min": -29.2664794921875, "p10": 3.732493019104005, "median": 31.242584228515625, "p90": 70.53075485229493, "max": 98.32042694091797, "pos_frac": 0.921875, "sample": [20.487497329711914, 12.655784606933594, 12.451126098632812, 31.65283966064453, 19.02685546875, 27.263214111328125, 19.306121826171875, 46.22502899169922, 64.6983413696289, 31.348388671875, 31.13677978515625, -0.34667205810546875, 34.56856918334961, 3.391469955444336, 54.37407684326172, 20.082164764404297, 22.866182327270508, 46.63593292236328, 26.521570205688477, 17.57146453857422, -3.7211990356445312, 62.10078430175781, 4.5282135009765625, 30.969451904296875, 22.843978881835938, 26.471223831176758, 48.44784927368164, -10.746471405029297, 90.95045471191406, 85.03638458251953, 23.275943756103516, 98.32042694091797, 28.025108337402344, 18.63653564453125, 41.86894226074219, 89.17823028564453, 10.669326782226562, -25.995223999023438, 34.96440887451172, 88.6927719116211, 12.975845336914062, 37.49473571777344, 84.92774963378906, 53.611167907714844, 56.01831817626953, 19.17953109741211, 57.667625427246094, 72.45451354980469, 0.27800750732421875, 25.085102081298828, 37.60785675048828, 53.989990234375, 19.75049591064453, 34.44711685180664, 39.069583892822266, 42.164573669433594, 11.473989486694336, 66.04198455810547, 33.51884460449219, -29.2664794921875, 34.75926971435547, 50.95924377441406, 13.064468383789062, 34.39088439941406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000426.npy"} +{"epoch": 0.6255506607929515, "step": 427, "batch_size": 64, "mean": 24.72673988342285, "std": 23.6002254486084, "min": -45.89013671875, "p10": 1.2410911560058606, "median": 18.56185531616211, "p90": 55.65249252319336, "max": 87.18797302246094, "pos_frac": 0.921875, "sample": [6.184356689453125, 11.921085357666016, 54.65080261230469, 77.97955322265625, 59.4998779296875, 51.506996154785156, 29.52445411682129, -3.9144744873046875, 51.08489990234375, 51.690391540527344, 9.496002197265625, 7.7047271728515625, -8.995254516601562, 12.734626770019531, 31.76300048828125, 27.29827880859375, 72.1010971069336, 62.07603454589844, 32.14801025390625, -45.89013671875, 14.473037719726562, 12.086090087890625, 35.086822509765625, 35.34297180175781, 13.098701477050781, 21.7469482421875, 16.29608154296875, 87.18797302246094, 18.437332153320312, 6.353153228759766, 31.258834838867188, 55.80292510986328, 40.18914031982422, 10.129941940307617, 9.14541244506836, 42.634647369384766, 6.949943542480469, 5.886684417724609, 19.453323364257812, -1.1329269409179688, 48.182151794433594, 22.37373161315918, 5.77630615234375, 46.86784744262695, 2.3616561889648438, 10.41201400756836, 30.222293853759766, 59.38602066040039, 0.697113037109375, 3.4302730560302734, 40.21788024902344, 15.052474975585938, 15.056900024414062, 18.686378479003906, 26.00601577758789, 15.932174682617188, 0.7608489990234375, 43.58116912841797, 55.301483154296875, -1.7167816162109375, 7.357112884521484, 12.520875930786133, 27.50830841064453, 5.545692443847656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000427.npy"} +{"epoch": 0.6270190895741556, "step": 428, "batch_size": 64, "mean": 35.59562683105469, "std": 27.30299186706543, "min": -15.609573364257812, "p10": 4.78185272216797, "median": 31.157679557800293, "p90": 69.23292694091796, "max": 124.00845336914062, "pos_frac": 0.96875, "sample": [5.82342529296875, 7.812587738037109, 31.353124618530273, 38.4456787109375, 124.00845336914062, 28.174636840820312, 8.452709197998047, 51.752197265625, 44.177650451660156, 47.568572998046875, 26.19903564453125, 8.748245239257812, 88.82743835449219, 13.153305053710938, 105.97998809814453, -7.21942138671875, -15.609573364257812, 69.23786163330078, 16.815597534179688, 45.112701416015625, 51.95219421386719, 30.838470458984375, 17.73939323425293, 37.44672775268555, 37.676177978515625, 91.71904754638672, 29.600173950195312, 19.633010864257812, 15.188617706298828, 50.50038528442383, 43.128456115722656, 15.714553833007812, 31.353729248046875, 17.14545249938965, 74.93199920654297, 2.1648731231689453, 29.91388702392578, 52.616241455078125, 7.166065216064453, 3.770191192626953, 42.89833068847656, 30.962234497070312, 26.668067932128906, 29.27007293701172, 10.337379455566406, 51.52227020263672, 2.287506103515625, 30.27283477783203, 57.412628173828125, 37.58557891845703, 12.622547149658203, 47.15192413330078, 69.2214126586914, 4.3354644775390625, 64.22328186035156, 42.697532653808594, 51.426055908203125, 80.5297622680664, 46.10661315917969, 18.827014923095703, 66.14675903320312, 1.3312911987304688, 16.35430908203125, 38.915428161621094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000428.npy"} +{"epoch": 0.6284875183553598, "step": 429, "batch_size": 64, "mean": 29.67691421508789, "std": 29.05752182006836, "min": -25.14111328125, "p10": -1.7938974380493165, "median": 23.155555725097656, "p90": 68.45147705078125, "max": 120.18927001953125, "pos_frac": 0.859375, "sample": [21.033653259277344, 32.648681640625, 11.255931854248047, 68.51409912109375, -5.105796813964844, 38.503211975097656, 4.454353332519531, 20.64923667907715, 46.9320068359375, 21.686309814453125, -7.422950744628906, 42.71531677246094, -1.7742080688476562, 11.774358749389648, 3.9075775146484375, 60.98003005981445, 51.529571533203125, 3.8551101684570312, 120.18927001953125, 18.629241943359375, 77.25743103027344, 93.90151977539062, 1.073699951171875, 51.408233642578125, 17.715713500976562, 4.701763153076172, 71.42711639404297, 21.328044891357422, 62.791595458984375, 34.096412658691406, -8.983531951904297, 24.673751831054688, 82.0224838256836, -25.14111328125, 8.531181335449219, -1.7086639404296875, -10.709892272949219, 24.624801635742188, 31.292360305786133, 13.462739944458008, 19.61901092529297, 40.87310791015625, 15.542579650878906, 18.324081420898438, 79.48159790039062, 12.205413818359375, 61.19603729248047, 7.841850280761719, 11.643718719482422, -19.660308837890625, 29.172393798828125, 47.90516662597656, 54.463539123535156, 68.30535888671875, 9.489355087280273, 5.1867218017578125, -1.8023357391357422, 32.82576370239258, 55.328956604003906, 33.17871856689453, 50.414188385009766, 34.404815673828125, 50.724082946777344, 43.93797302246094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000429.npy"} +{"epoch": 0.6299559471365639, "step": 430, "batch_size": 64, "mean": 30.790908813476562, "std": 26.75757598876953, "min": -11.660322189331055, "p10": -3.097039031982418, "median": 25.939796447753906, "p90": 66.58873291015625, "max": 106.94229125976562, "pos_frac": 0.890625, "sample": [50.4027099609375, 10.039810180664062, 43.908966064453125, -6.723560333251953, 73.505615234375, 18.81192398071289, 24.3577880859375, 21.525775909423828, 18.383649826049805, 32.507118225097656, 2.2525291442871094, 18.138397216796875, 5.7283172607421875, 25.025848388671875, 20.696380615234375, 51.087249755859375, -10.22750473022461, 40.128997802734375, 24.555862426757812, 6.878765106201172, 14.362213134765625, -4.744239807128906, 32.06996536254883, 40.958805084228516, 10.546846389770508, 10.316314697265625, 34.913421630859375, 54.56292724609375, 0.746429443359375, 43.787757873535156, 21.507638931274414, -10.768903732299805, 70.16326904296875, 33.262306213378906, 19.41364288330078, 44.59248352050781, 7.511314392089844, -11.357147216796875, 9.164228439331055, 37.74818420410156, 58.72657775878906, 20.571632385253906, 51.440513610839844, 24.906455993652344, 26.853744506835938, 89.54547119140625, 44.23350524902344, 34.59205627441406, 67.14874267578125, 10.691864013671875, 31.003231048583984, 19.843143463134766, 65.28204345703125, 34.48004150390625, -11.660322189331055, 50.861724853515625, 83.19566345214844, -8.0867919921875, 57.014373779296875, 106.94229125976562, 99.24786376953125, 37.63526916503906, 31.72270965576172, 14.684200286865234], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000430.npy"} +{"epoch": 0.631424375917768, "step": 431, "batch_size": 64, "mean": 34.62909698486328, "std": 26.41916847229004, "min": -6.287508010864258, "p10": 2.4207304000854495, "median": 30.567895889282227, "p90": 76.36001129150391, "max": 109.81414794921875, "pos_frac": 0.953125, "sample": [2.0958251953125, 30.039905548095703, 4.787708282470703, 50.316192626953125, 39.71757507324219, 13.594406127929688, 77.47158813476562, 20.72716522216797, 11.510429382324219, 32.934593200683594, 4.5437164306640625, 39.162940979003906, 2.698484420776367, 1.4803352355957031, 2.3016929626464844, 8.065078735351562, 2.2085037231445312, 3.6968231201171875, 12.433418273925781, -0.9259452819824219, 39.53178405761719, 39.18159484863281, 50.109283447265625, 74.41464233398438, 11.055660247802734, 31.09588623046875, 77.19374084472656, 38.278377532958984, 78.0201416015625, 60.23112487792969, 73.08709716796875, 23.856346130371094, 58.902435302734375, 54.3603630065918, 43.220733642578125, -2.4367523193359375, 109.81414794921875, 8.474632263183594, 56.946205139160156, 31.226898193359375, 79.97989654541016, 51.5579833984375, 49.2711181640625, -6.287508010864258, 23.869873046875, 23.81633758544922, 80.34170532226562, 18.13408851623535, 20.023256301879883, 53.930137634277344, 61.294464111328125, 27.025039672851562, 21.58679962158203, 40.603187561035156, 26.57027244567871, 55.317298889160156, 21.592676162719727, 17.729217529296875, 57.004913330078125, 21.638322830200195, 82.26618957519531, 19.180130004882812, 10.306499481201172, 44.08558654785156], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000431.npy"} +{"epoch": 0.6328928046989721, "step": 432, "batch_size": 64, "mean": 32.3117790222168, "std": 27.88199234008789, "min": -25.693618774414062, "p10": -0.7496078491210927, "median": 29.696426391601562, "p90": 65.77025909423828, "max": 124.13148498535156, "pos_frac": 0.890625, "sample": [34.470306396484375, 13.55820083618164, 28.74871826171875, 15.525211334228516, 35.254180908203125, -1.2584152221679688, 16.566818237304688, 22.994953155517578, 24.31955337524414, 3.0241012573242188, -3.859264373779297, -1.2264671325683594, 49.665740966796875, 60.37053680419922, 37.797306060791016, 124.13148498535156, 30.251388549804688, 43.808990478515625, 30.452232360839844, -8.784904479980469, 4.031654357910156, 26.979415893554688, 66.16645812988281, 24.750900268554688, 36.366607666015625, 77.93799591064453, 29.141464233398438, 0.3630638122558594, 31.15704345703125, 21.00554656982422, 27.552505493164062, 7.322090148925781, 35.262203216552734, 31.575366973876953, 4.74664306640625, 105.49640655517578, 40.02297592163086, -25.693618774414062, 38.63201904296875, 24.66912841796875, 7.948814392089844, -2.1251373291015625, 64.84579467773438, 20.423324584960938, 62.09638214111328, 49.23206329345703, 31.584701538085938, 82.427490234375, 47.89811706542969, -4.04913330078125, 18.886917114257812, 59.38505554199219, 23.44601821899414, 11.171974182128906, 38.603965759277344, 24.169116973876953, 100.19037628173828, 11.330028533935547, 50.630706787109375, 50.66046142578125, 12.702995300292969, 42.15138244628906, 32.13207244873047, 68.91383361816406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000432.npy"} +{"epoch": 0.6343612334801763, "step": 433, "batch_size": 64, "mean": 28.365665435791016, "std": 26.73403549194336, "min": -26.01568603515625, "p10": -3.5823831558227535, "median": 25.44792652130127, "p90": 66.77043685913087, "max": 93.929443359375, "pos_frac": 0.84375, "sample": [9.499252319335938, 46.83619689941406, 76.94300842285156, 32.33671569824219, 44.312442779541016, -2.7405052185058594, 18.331193923950195, -10.618701934814453, 66.00806427001953, 49.02989196777344, 3.66290283203125, -3.7364120483398438, 93.929443359375, 28.767868041992188, 22.850631713867188, 67.50007629394531, 40.61976623535156, 66.30494689941406, 54.311004638671875, 16.7535400390625, -3.222982406616211, 39.62146759033203, 41.368919372558594, 29.588157653808594, 21.164106369018555, 25.570587158203125, 12.891960144042969, 40.324928283691406, 27.932418823242188, 36.73704147338867, 3.03961181640625, 35.750396728515625, -7.607574462890625, 17.122314453125, 5.0133514404296875, 49.02735900878906, 66.96993255615234, -6.1447296142578125, 16.076080322265625, 6.761486053466797, 92.84504699707031, 55.36456298828125, 24.184593200683594, -10.288955688476562, -6.9521484375, 25.325265884399414, 25.266944885253906, 17.64801788330078, 11.519519805908203, -26.01568603515625, 71.58280944824219, 1.7373809814453125, 31.045372009277344, 2.048614501953125, 10.052156448364258, 9.280052185058594, 43.725982666015625, 31.71540069580078, -1.3043270111083984, 82.53628540039062, 43.927772521972656, 34.839263916015625, 49.00421142578125, 17.428237915039062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000433.npy"} +{"epoch": 0.6358296622613803, "step": 434, "batch_size": 64, "mean": 29.513980865478516, "std": 25.28011131286621, "min": -11.158075332641602, "p10": -0.0903049468994126, "median": 24.724658966064453, "p90": 57.471968078613294, "max": 113.81451416015625, "pos_frac": 0.890625, "sample": [58.908851623535156, 48.097633361816406, 113.81451416015625, 1.5989341735839844, -7.2209930419921875, 4.5305938720703125, 33.25751495361328, 20.99590301513672, -11.158075332641602, -0.6878433227539062, 38.03692626953125, 54.119239807128906, 21.32206153869629, 86.17977905273438, 17.99686050415039, 1.3039512634277344, 23.07006072998047, 14.00164794921875, 31.89281463623047, 87.32478332519531, 10.561233520507812, 24.534671783447266, 43.711456298828125, 8.198829650878906, 21.846572875976562, 37.7049560546875, 20.729623794555664, 20.858625411987305, 27.095048904418945, 12.906028747558594, 22.678451538085938, 32.549903869628906, 35.205650329589844, -6.731048583984375, 34.08345031738281, 37.7464485168457, -2.150543212890625, 8.443483352661133, 42.76666259765625, 50.2147102355957, 6.189079284667969, 71.67816925048828, 47.68682861328125, -5.463951110839844, 32.784568786621094, 31.991031646728516, 34.598175048828125, 24.91464614868164, 27.215190887451172, 64.54306030273438, 48.68803405761719, 10.850461959838867, 42.24580764770508, 18.78122329711914, 19.513160705566406, 17.711875915527344, 44.18301773071289, 41.804779052734375, 33.70885467529297, 23.538978576660156, 102.3184585571289, 12.036386489868164, 18.83014678955078, -1.792520523071289], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000434.npy"} +{"epoch": 0.6372980910425844, "step": 435, "batch_size": 64, "mean": 32.3472900390625, "std": 29.603843688964844, "min": -36.06764221191406, "p10": 1.6940469741821302, "median": 27.134838104248047, "p90": 77.46041717529297, "max": 114.38360595703125, "pos_frac": 0.9375, "sample": [78.39309692382812, -4.922710418701172, 13.5562744140625, 43.68182373046875, 62.89335632324219, 35.50236129760742, 6.357126235961914, 23.235565185546875, 73.82659912109375, 27.17656707763672, 69.47360229492188, 35.903900146484375, 4.7804107666015625, 56.39757537841797, 24.27703857421875, 80.4052734375, 35.995033264160156, 14.330520629882812, 30.163421630859375, 1.1572189331054688, -4.286964416503906, 14.00592041015625, 0.18109130859375, 25.12176513671875, 0.7306327819824219, 30.703266143798828, 7.970676422119141, 27.093109130859375, -10.5802001953125, 9.184904098510742, 41.98968505859375, 2.946645736694336, 37.93895721435547, 29.677490234375, 49.489341735839844, 46.359683990478516, 58.434059143066406, -36.06764221191406, 4.7152099609375, 8.657344818115234, 15.540283203125, 7.487266540527344, 79.74098205566406, 8.667518615722656, 43.424903869628906, 83.30552673339844, 75.28416442871094, 56.06111145019531, 14.833946228027344, 4.022409439086914, 114.38360595703125, 18.596860885620117, 43.64385223388672, 17.927688598632812, 80.58209991455078, 96.71363830566406, 9.792259216308594, 16.20379638671875, 12.00650405883789, 58.46330261230469, 29.10842514038086, 21.21533966064453, 33.75677490234375, 72.61519622802734], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000435.npy"} +{"epoch": 0.6387665198237885, "step": 436, "batch_size": 64, "mean": 31.527891159057617, "std": 24.287145614624023, "min": -6.603973388671875, "p10": 1.821120834350588, "median": 32.005001068115234, "p90": 62.72529678344727, "max": 122.19950866699219, "pos_frac": 0.921875, "sample": [-0.4515380859375, -6.603973388671875, -5.0181121826171875, 32.56963348388672, 60.818641662597656, 44.77760314941406, 13.8759765625, 49.52922058105469, 21.730712890625, 7.0438690185546875, 37.27751159667969, 75.52275085449219, 16.699569702148438, 11.448633193969727, 49.097564697265625, 37.18323516845703, 0.9489631652832031, 32.9471435546875, 50.88506317138672, -4.935699462890625, 23.214187622070312, 35.55812072753906, 44.16114807128906, 42.413536071777344, 74.85223388671875, 33.730369567871094, 50.2357177734375, 35.854827880859375, 16.27446746826172, 38.720123291015625, 91.10537719726562, 11.6226806640625, 122.19950866699219, 21.760269165039062, 63.54243469238281, 12.868877410888672, 14.943367004394531, 35.757102966308594, 39.046688079833984, 27.137237548828125, 68.86053466796875, 11.28714370727539, 25.269439697265625, 12.368247985839844, 42.835060119628906, 32.882484436035156, 12.07748794555664, 0.32569122314453125, 34.639583587646484, 15.443733215332031, 25.89087677001953, 30.304054260253906, 12.321487426757812, 41.368927001953125, 65.08492279052734, 12.694747924804688, 49.76757049560547, 15.70241928100586, 3.8561553955078125, 31.44036865234375, -3.958375930786133, 46.15394973754883, 47.767822265625, 23.05567169189453], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000436.npy"} +{"epoch": 0.6402349486049926, "step": 437, "batch_size": 64, "mean": 28.168094635009766, "std": 23.734500885009766, "min": -14.482528686523438, "p10": -3.9695531845092753, "median": 25.354019165039062, "p90": 61.31471595764161, "max": 75.3101806640625, "pos_frac": 0.875, "sample": [14.98736572265625, -2.0578765869140625, 15.414596557617188, 27.393217086791992, 16.520965576171875, -14.092733383178711, 43.466888427734375, 34.355186462402344, 25.2861328125, 0.0396881103515625, 7.371097564697266, 23.358478546142578, -5.55535888671875, 69.57962799072266, 15.078804016113281, 32.41966247558594, 66.69471740722656, -12.016210556030273, -8.270149230957031, 55.79638671875, 14.346923828125, 52.688018798828125, 23.29298973083496, 38.46788787841797, 11.293342590332031, 75.3101806640625, 13.851371765136719, 9.071640014648438, 66.41587829589844, 17.797378540039062, 61.93086242675781, 50.69013977050781, 63.06788635253906, 1.6590118408203125, 53.19316101074219, -14.482528686523438, 29.846200942993164, 9.302911758422852, 59.87704086303711, 25.421905517578125, 37.310325622558594, 22.186691284179688, 30.5932559967041, 54.57522964477539, 59.429039001464844, 8.908451080322266, 57.40985107421875, 12.599067687988281, 39.581756591796875, 44.28858947753906, -4.788843154907227, 2.244171142578125, 42.92498779296875, -10.46490478515625, 16.706459045410156, 67.19384765625, 35.80621337890625, 40.74253845214844, 16.630775451660156, 46.75624084472656, 48.679500579833984, 16.046485900878906, 31.118228912353516, 17.4674072265625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000437.npy"} +{"epoch": 0.6417033773861968, "step": 438, "batch_size": 64, "mean": 34.36668395996094, "std": 27.218605041503906, "min": -15.934722900390625, "p10": 5.981065368652344, "median": 25.980530738830566, "p90": 71.68935546875001, "max": 103.27169799804688, "pos_frac": 0.9375, "sample": [10.868288040161133, 24.667009353637695, 51.63307189941406, 15.72344970703125, 12.778104782104492, 18.184524536132812, 91.72218322753906, -3.8825931549072266, 22.944053649902344, 14.903854370117188, 23.73009490966797, 19.383323669433594, 32.5223388671875, 16.533004760742188, 75.20478820800781, 31.69970703125, 65.33010864257812, 62.636558532714844, 21.73788833618164, 6.500083923339844, 17.114295959472656, 67.60116577148438, 58.44978332519531, 84.92115020751953, 36.44465637207031, 30.38806915283203, 35.73743438720703, 56.4913330078125, -15.934722900390625, 5.758628845214844, 10.471258163452148, 6.995353698730469, 56.48184585571289, 49.65130615234375, 77.93753051757812, 17.649253845214844, 17.206499099731445, 16.09076690673828, 53.38859558105469, 60.35980224609375, 103.27169799804688, 72.834716796875, 8.091962814331055, 68.11286926269531, -8.261810302734375, 13.798171997070312, 32.80424118041992, 34.11225891113281, 27.294052124023438, 58.83025360107422, 18.63492202758789, 69.016845703125, 62.955039978027344, 23.14519500732422, 0.12787818908691406, 5.621824264526367, 15.869636535644531, 11.885643005371094, 35.152061462402344, 59.79057693481445, 24.1658935546875, 81.96826934814453, -5.2573699951171875, 27.479087829589844], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000438.npy"} +{"epoch": 0.6431718061674009, "step": 439, "batch_size": 64, "mean": 26.97458839416504, "std": 24.870941162109375, "min": -20.33409881591797, "p10": 1.1402645111083987, "median": 24.122360229492188, "p90": 62.87066078186036, "max": 98.43869018554688, "pos_frac": 0.90625, "sample": [41.52153778076172, 93.10154724121094, 8.747631072998047, 45.0307731628418, -3.0170650482177734, 7.4014739990234375, 27.167823791503906, 57.455047607421875, 34.21990203857422, 32.86578369140625, 13.277645111083984, 41.10557556152344, 15.678466796875, 1.4432754516601562, 12.978195190429688, 31.226835250854492, 27.486528396606445, 3.36309814453125, 20.574356079101562, 9.40667724609375, 7.825239181518555, 64.50342559814453, 34.31245422363281, 4.607452392578125, 39.23827362060547, -6.8011474609375, 28.18114471435547, 32.14352798461914, -1.198953628540039, 18.03004264831543, 66.97236633300781, 6.922187805175781, 37.698883056640625, 31.339061737060547, 98.43869018554688, 43.20857238769531, 61.84715270996094, 69.0189208984375, 31.439064025878906, 29.17359161376953, 4.6746978759765625, -18.138206481933594, 15.216934204101562, 7.812187194824219, 61.15814971923828, 8.965778350830078, 31.43310546875, 22.81470489501953, 20.894241333007812, 25.430015563964844, 75.62263488769531, 17.687007904052734, 8.65423583984375, 49.72113037109375, 7.94450569152832, 28.452951431274414, 63.30930709838867, 22.123695373535156, -4.067478179931641, 4.9058380126953125, 53.193092346191406, 19.953807830810547, -20.33409881591797, 1.0104026794433594], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000439.npy"} +{"epoch": 0.644640234948605, "step": 440, "batch_size": 64, "mean": 25.880369186401367, "std": 27.163881301879883, "min": -22.038894653320312, "p10": -6.573911285400387, "median": 19.851463317871094, "p90": 65.4570785522461, "max": 114.24848937988281, "pos_frac": 0.859375, "sample": [38.6461067199707, 11.1190185546875, 50.32643127441406, 45.16130065917969, 53.6573486328125, 20.079452514648438, 26.770225524902344, 32.51551055908203, 19.194385528564453, 71.62940216064453, 18.018522262573242, 18.172809600830078, 69.30165100097656, 5.384208679199219, 35.603912353515625, 4.59014892578125, 30.166034698486328, -18.765609741210938, 7.766563415527344, 14.698997497558594, 19.125518798828125, 44.39972686767578, 29.615936279296875, 19.330215454101562, 28.62622833251953, 22.450668334960938, -17.187204360961914, -1.9698028564453125, 8.408935546875, 5.118276596069336, 37.034423828125, 79.80740356445312, -13.463409423828125, 33.37590026855469, 16.213586807250977, 14.964981079101562, 63.4764404296875, -22.038894653320312, 22.919204711914062, 3.126462936401367, 53.27679443359375, 20.90959930419922, 81.27824401855469, 59.476287841796875, 19.62347412109375, 16.870643615722656, 25.77025604248047, 6.397010803222656, 44.04447937011719, 66.30592346191406, 21.51233673095703, 114.24848937988281, 12.995002746582031, 17.148157119750977, 8.479337692260742, 71.16573333740234, 10.045093536376953, -13.141487121582031, -8.270912170410156, -2.6142425537109375, 39.63904571533203, 53.514923095703125, -10.9967041015625, 1.2951507568359375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000440.npy"} +{"epoch": 0.6461086637298091, "step": 441, "batch_size": 64, "mean": 30.808921813964844, "std": 24.265342712402344, "min": -39.81626892089844, "p10": 3.58338966369629, "median": 28.79450035095215, "p90": 58.299786758422854, "max": 102.75078582763672, "pos_frac": 0.9375, "sample": [39.48384094238281, 2.800809860229492, 20.08026123046875, 58.893653869628906, 40.38323211669922, 49.47669219970703, 24.71617889404297, 24.349349975585938, 44.391578674316406, 8.050308227539062, 11.311946868896484, 30.72649574279785, 36.9643440246582, 56.91409683227539, 17.617435455322266, 50.540679931640625, 11.609132766723633, 21.879898071289062, 13.474960327148438, 49.6287841796875, 1.2844581604003906, 3.3216552734375, 28.670013427734375, 74.93307495117188, 50.98735046386719, 20.85778045654297, 8.322189331054688, 53.941261291503906, 20.035125732421875, 102.75078582763672, 7.619283676147461, -8.227752685546875, 37.97216796875, 33.335166931152344, 29.995361328125, 30.98713493347168, 4.194103240966797, 49.80591583251953, 67.19628143310547, 17.7442626953125, 31.584793090820312, 27.817337036132812, -2.9727020263671875, -39.81626892089844, 51.24473571777344, 28.525100708007812, 34.82912826538086, 83.41060638427734, 38.28179931640625, 20.035621643066406, 12.269241333007812, 15.277130126953125, 50.85755920410156, 27.273849487304688, 14.608711242675781, 32.581451416015625, 68.86366271972656, 24.783233642578125, 28.227088928222656, 86.18128967285156, 28.918987274169922, -7.490680694580078, 35.3313102722168, 32.12886047363281], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000441.npy"} +{"epoch": 0.6475770925110133, "step": 442, "batch_size": 64, "mean": 37.83577346801758, "std": 26.97312355041504, "min": -12.356636047363281, "p10": 9.353521347045898, "median": 33.585201263427734, "p90": 71.43325958251954, "max": 102.5697021484375, "pos_frac": 0.96875, "sample": [31.666297912597656, 19.956409454345703, 59.29777145385742, 1.1578521728515625, 45.597801208496094, 34.541351318359375, 11.035247802734375, 38.56797790527344, 22.547256469726562, 34.13430404663086, 14.167095184326172, 72.10831451416016, 44.39386749267578, 52.7110595703125, 42.477142333984375, 85.3658218383789, 69.8581314086914, 13.93209457397461, 18.305923461914062, 35.17184066772461, 9.646438598632812, 60.52250671386719, 16.677438735961914, 64.62422180175781, 16.242918014526367, 51.748600006103516, 90.66539001464844, 16.27637481689453, 60.77032470703125, 55.742835998535156, 33.866416931152344, 14.57762336730957, 11.31863784790039, 1.7699165344238281, 25.95075225830078, 60.7698974609375, -5.877708435058594, 100.75370025634766, 45.08964157104492, 61.61546325683594, 19.548263549804688, 32.22132873535156, 34.801422119140625, 9.227985382080078, 20.336639404296875, 82.2606430053711, 66.96455383300781, 28.81085205078125, 96.5226821899414, 35.68824005126953, 30.806922912597656, 5.4709014892578125, -12.356636047363281, 33.303985595703125, 16.190635681152344, 1.6237335205078125, 51.86650848388672, 18.42931365966797, 102.5697021484375, 24.40240478515625, 65.13198852539062, 19.83330535888672, 31.56098175048828, 66.52816772460938], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000442.npy"} +{"epoch": 0.6490455212922174, "step": 443, "batch_size": 64, "mean": 28.547142028808594, "std": 27.842365264892578, "min": -28.24059295654297, "p10": -6.608889007568359, "median": 27.326040267944336, "p90": 63.84203567504884, "max": 101.35150909423828, "pos_frac": 0.828125, "sample": [101.35150909423828, 8.97713851928711, 64.98435974121094, 4.3276214599609375, 26.153640747070312, -5.7119598388671875, 34.59135437011719, -0.3467864990234375, 42.088932037353516, 27.805578231811523, 2.002044677734375, 33.968963623046875, 26.84650230407715, 17.507369995117188, 18.481239318847656, -17.89244842529297, 36.587860107421875, -8.396026611328125, 74.3504409790039, 30.212556838989258, 74.33384704589844, 76.36013793945312, 54.135215759277344, 39.5925178527832, -28.24059295654297, 21.15850830078125, 49.888214111328125, 23.383827209472656, 45.73343276977539, 40.19651794433594, 38.69329833984375, 46.48197937011719, -0.3098602294921875, 8.087310791015625, 15.130390167236328, 14.616386413574219, 32.76776885986328, 56.593658447265625, -16.16509246826172, -6.8787384033203125, -5.979240417480469, -8.927192687988281, 49.1151123046875, 30.411510467529297, 2.012836456298828, 2.1031646728515625, 48.96952819824219, 16.511428833007812, 35.5458984375, 61.176612854003906, 57.717132568359375, 14.811649322509766, 15.447616577148438, 33.13995361328125, -11.960281372070312, 55.96923065185547, 17.30561065673828, 53.59895324707031, 25.305519104003906, 7.2340545654296875, 80.08160400390625, 90.697998046875, 16.60723114013672, 36.672454833984375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000443.npy"} +{"epoch": 0.6505139500734214, "step": 444, "batch_size": 64, "mean": 34.26472473144531, "std": 28.675121307373047, "min": -25.827930450439453, "p10": 0.5260257720947279, "median": 31.261930465698242, "p90": 67.79990997314454, "max": 118.71453094482422, "pos_frac": 0.890625, "sample": [21.484085083007812, 2.243682861328125, 8.094169616699219, 63.18749237060547, 2.6780357360839844, 34.95236587524414, 29.55349349975586, 68.51702880859375, 32.970367431640625, -25.827930450439453, 61.48159408569336, 66.12663269042969, 58.482154846191406, 57.385963439941406, 103.24452209472656, 20.344345092773438, 50.294525146484375, 19.749420166015625, 24.29473114013672, 1.8739471435546875, 11.222959518432617, -6.531147003173828, 79.36463928222656, 118.71453094482422, -0.051654815673828125, 53.134849548339844, 46.192901611328125, -0.732330322265625, 19.581340789794922, 41.395538330078125, 41.50707244873047, 102.6728744506836, 56.11856460571289, 6.945009231567383, 11.30816650390625, 80.47266387939453, 23.536529541015625, 45.468910217285156, 62.65077209472656, 20.571205139160156, 13.605033874511719, 59.444915771484375, 51.35508728027344, -6.5710296630859375, 34.80853271484375, 38.960655212402344, 25.122482299804688, 28.315889358520508, 20.338211059570312, 36.73372268676758, 50.55476379394531, 8.063526153564453, 15.273748397827148, 26.84314727783203, 36.62244415283203, -1.07952880859375, 4.284936904907227, 39.365692138671875, 22.183229446411133, 28.145416259765625, 72.41719055175781, -5.756996154785156, 45.7642822265625, 33.472938537597656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000444.npy"} +{"epoch": 0.6519823788546255, "step": 445, "batch_size": 64, "mean": 31.9174747467041, "std": 27.97288703918457, "min": -17.563079833984375, "p10": 0.14051380157470733, "median": 24.574329376220703, "p90": 64.97780532836914, "max": 126.83767700195312, "pos_frac": 0.90625, "sample": [7.888973236083984, 0.011081695556640625, 27.771682739257812, 62.60906982421875, 19.7242431640625, 37.82054138183594, 6.115091323852539, -4.6718292236328125, 24.401039123535156, 50.039306640625, 14.566120147705078, 72.15343475341797, 3.0619354248046875, -3.4669933319091797, 34.45542907714844, -8.8365478515625, 44.45916748046875, 59.09496307373047, -4.716829299926758, 16.67413330078125, 13.151348114013672, 22.481826782226562, 60.132041931152344, 61.30879211425781, 2.027973175048828, 23.280982971191406, 64.194091796875, 65.81924438476562, 22.651992797851562, 62.13633728027344, 30.82619857788086, 53.9053955078125, 45.017173767089844, 65.31368255615234, 46.24146270751953, 3.9650115966796875, 47.114219665527344, 55.32975769042969, 0.4425220489501953, 68.84660339355469, -0.46526336669921875, 126.83767700195312, 67.31463623046875, 13.185256958007812, 17.76729965209961, 57.062896728515625, 8.827583312988281, 7.164176940917969, 22.925071716308594, 27.5429630279541, 30.324874877929688, 24.74761962890625, 3.685161590576172, 63.565101623535156, 62.3121337890625, 41.59803771972656, 8.489715576171875, 23.857439041137695, 98.05050659179688, 13.834381103515625, 18.960994720458984, 22.24213409423828, 27.1104736328125, -17.563079833984375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000445.npy"} +{"epoch": 0.6534508076358296, "step": 446, "batch_size": 64, "mean": 30.172958374023438, "std": 26.881940841674805, "min": -13.298759460449219, "p10": 1.4689628601074218, "median": 29.502933502197266, "p90": 58.65085220336916, "max": 121.2496337890625, "pos_frac": 0.921875, "sample": [13.610366821289062, -13.298759460449219, 1.5146942138671875, 63.11927795410156, 89.54214477539062, 46.38033676147461, 30.920124053955078, 22.69522476196289, 10.985286712646484, 107.37059020996094, 121.2496337890625, 32.15069580078125, 41.94019317626953, 40.93504333496094, 8.26446533203125, 44.566898345947266, 52.789398193359375, 9.092567443847656, 16.892574310302734, 23.34864044189453, -0.1880035400390625, 1.4493637084960938, 34.27099609375, 8.303855895996094, 2.4298934936523438, 13.145675659179688, 38.94300079345703, 44.20298385620117, 18.512619018554688, 38.52779769897461, 29.451614379882812, 3.935699462890625, 44.92933654785156, 44.57685852050781, 0.38910865783691406, 35.63468933105469, -4.583160400390625, 60.66502380371094, 2.0042877197265625, 65.15460205078125, 50.21403503417969, 3.6501102447509766, 43.46399688720703, 29.55425262451172, 47.046234130859375, 14.660774230957031, 36.914398193359375, 42.91015625, 29.80280303955078, 53.745880126953125, 25.6131591796875, 14.7591552734375, 4.015533447265625, 17.73801040649414, -0.4235496520996094, 6.404502868652344, 12.11587905883789, 18.087478637695312, 53.95111846923828, 29.686296463012695, -3.5627059936523438, 93.66592407226562, 19.581451416015625, 41.64878845214844], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000446.npy"} +{"epoch": 0.6549192364170338, "step": 447, "batch_size": 64, "mean": 31.73101806640625, "std": 28.9771671295166, "min": -10.571380615234375, "p10": -0.9110239028930646, "median": 27.255020141601562, "p90": 67.70272369384766, "max": 142.198974609375, "pos_frac": 0.890625, "sample": [23.254009246826172, 45.12932586669922, 13.962417602539062, 29.331663131713867, 1.7430267333984375, -2.8771915435791016, 54.176910400390625, 43.65802764892578, 23.279598236083984, -7.8604583740234375, 1.4079627990722656, 51.975982666015625, 36.2225341796875, 32.6088981628418, 62.270965576171875, 27.337493896484375, 87.05473327636719, -2.6533565521240234, 142.198974609375, 68.11370849609375, 13.79893684387207, 101.5321044921875, 48.98455047607422, 47.74756622314453, 14.816879272460938, 28.261497497558594, 33.00385665893555, 2.647014617919922, 12.561882019042969, 23.744239807128906, 4.1564788818359375, -6.859260559082031, 34.736175537109375, -10.571380615234375, 50.07593536376953, 10.233442306518555, 59.88250732421875, 27.17254638671875, 14.908279418945312, 1.52276611328125, 16.016921997070312, 0.9549827575683594, 39.24802780151367, -1.7107410430908203, 66.74375915527344, 16.303184509277344, 26.507675170898438, 16.136247634887695, 13.635009765625, 16.0809268951416, 24.559906005859375, 39.07069396972656, 71.97492980957031, 33.26301574707031, 14.019523620605469, 51.053985595703125, 59.551265716552734, 10.520149230957031, 74.5093765258789, 83.05807495117188, 30.220924377441406, 48.14190673828125, 43.238407135009766, -5.00433349609375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000447.npy"} +{"epoch": 0.6563876651982379, "step": 448, "batch_size": 64, "mean": 32.101654052734375, "std": 25.543386459350586, "min": -23.457015991210938, "p10": 5.658212471008301, "median": 24.609575271606445, "p90": 74.5552131652832, "max": 88.93195343017578, "pos_frac": 0.9375, "sample": [24.679527282714844, 17.100147247314453, 20.152095794677734, 49.39787292480469, 79.45246124267578, -9.25311279296875, 21.310279846191406, 17.69762420654297, 24.21918487548828, 26.230621337890625, 41.496986389160156, 14.856307983398438, 32.205047607421875, 13.656715393066406, 58.92243194580078, 46.22844696044922, 21.135732650756836, 44.479278564453125, 74.73652648925781, 21.981910705566406, -23.457015991210938, 25.032176971435547, 66.72417449951172, 22.172775268554688, 24.539623260498047, 5.749114990234375, 27.130474090576172, 16.51183319091797, 64.75958251953125, 78.71743774414062, 36.15985870361328, 57.71204376220703, 5.716535568237305, 74.13214874267578, 82.25837707519531, -2.892976760864258, 23.92291259765625, 13.881942749023438, 5.633216857910156, 29.163330078125, 17.960861206054688, 77.09286499023438, 16.901229858398438, 17.269882202148438, 88.93195343017578, 30.30401611328125, 10.8302001953125, 18.783044815063477, 13.898025512695312, 51.694190979003906, 36.34899139404297, -4.905487060546875, 69.07456970214844, 9.3963623046875, 44.09521484375, 63.96001434326172, 1.6326980590820312, 16.08202362060547, 25.74495506286621, 4.23748779296875, 50.418460845947266, 25.188888549804688, 19.399871826171875, 75.9119644165039], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000448.npy"} +{"epoch": 0.657856093979442, "step": 449, "batch_size": 64, "mean": 31.415071487426758, "std": 30.40106201171875, "min": -49.0272216796875, "p10": -0.7997825622558578, "median": 30.896163940429688, "p90": 75.41165924072267, "max": 119.75822448730469, "pos_frac": 0.890625, "sample": [3.767852783203125, 20.682586669921875, 32.684906005859375, 5.5046844482421875, 76.88174438476562, 40.30268096923828, 16.067001342773438, 12.130308151245117, 82.03456115722656, 39.38831329345703, 66.97078704833984, 19.90231704711914, 1.4271316528320312, 0.8472137451171875, 30.94183349609375, 43.591983795166016, 37.385948181152344, 17.187339782714844, 24.92601776123047, 3.420360565185547, 62.085479736328125, 20.376686096191406, 49.82801055908203, 32.497802734375, 14.626167297363281, 86.26348876953125, 30.95447540283203, 7.376142501831055, 30.850494384765625, 39.753841400146484, 42.18503952026367, 3.0599441528320312, 15.026397705078125, 26.494369506835938, 41.495849609375, 95.50808715820312, -14.660728454589844, 19.826980590820312, -6.506500244140625, -6.80029296875, 119.75822448730469, 43.758567810058594, -13.078193664550781, 57.194190979003906, 35.205909729003906, 15.967864990234375, -7.9764404296875, -49.0272216796875, 29.030712127685547, 71.98146057128906, 44.30542755126953, 79.93643951416016, 36.142059326171875, 11.916738510131836, 10.625518798828125, 43.204612731933594, 55.96124267578125, 102.54569244384766, 42.157997131347656, 51.78765869140625, 18.688377380371094, -1.5056381225585938, 34.01060485839844, 11.68564224243164], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000449.npy"} +{"epoch": 0.6593245227606461, "step": 450, "batch_size": 64, "mean": 34.53790283203125, "std": 26.31365394592285, "min": -4.369209289550781, "p10": 3.9715761184692404, "median": 29.440296173095703, "p90": 67.05525360107423, "max": 121.40519714355469, "pos_frac": 0.953125, "sample": [26.9737548828125, 21.34979248046875, 44.32029724121094, 45.84419250488281, 25.21075439453125, 97.64665222167969, 65.85295104980469, -3.1928558349609375, 11.81976318359375, 1.0396347045898438, 22.18413543701172, 81.55931091308594, 70.58348083496094, 12.300315856933594, 51.21868133544922, 50.4974365234375, 35.38923645019531, -1.4954833984375, 30.69256591796875, 52.531394958496094, 33.17259216308594, 14.64202880859375, 11.073890686035156, 58.32763671875, 33.18577575683594, 20.797382354736328, 51.97344970703125, 69.71493530273438, 25.522300720214844, 61.8212890625, 43.37770080566406, 67.57052612304688, 54.02172088623047, 17.9765625, 40.979026794433594, 2.9325428009033203, 16.435123443603516, 54.45494842529297, 28.188026428222656, 24.48107147216797, 86.95774841308594, 0.389923095703125, 14.173919677734375, 65.4326400756836, 7.106800079345703, 23.921409606933594, -4.369209289550781, 42.5980224609375, 45.13397979736328, 8.734527587890625, 16.209388732910156, 16.105148315429688, 41.13970184326172, 55.6148681640625, 22.932416915893555, 3.0087623596191406, 60.00093078613281, 36.818138122558594, 37.67424774169922, 12.715492248535156, 7.290428161621094, 121.40519714355469, 10.238632202148438, 6.218141555786133], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000450.npy"} +{"epoch": 0.6607929515418502, "step": 451, "batch_size": 64, "mean": 29.465295791625977, "std": 26.04265785217285, "min": -38.6256103515625, "p10": 0.4771385192871095, "median": 24.735360145568848, "p90": 64.72141647338867, "max": 95.07046508789062, "pos_frac": 0.921875, "sample": [9.983291625976562, 48.83152770996094, 24.444915771484375, 63.65802764892578, 53.294654846191406, 18.891403198242188, 3.9172210693359375, 58.12037658691406, 8.957412719726562, 84.12345886230469, 40.765716552734375, 95.07046508789062, 45.1068115234375, 60.35862731933594, 20.029075622558594, 48.021217346191406, 6.833829879760742, -38.6256103515625, 27.653602600097656, 0.575164794921875, 24.145896911621094, 17.36126708984375, 57.89715576171875, 66.55612182617188, -9.262985229492188, 11.828536987304688, 13.119222640991211, 33.165706634521484, 17.78774642944336, 67.77043151855469, 29.665203094482422, 82.498046875, 51.93547821044922, 25.012542724609375, 52.82306671142578, 28.432769775390625, 33.40557861328125, 13.858993530273438, 33.38391876220703, -3.887664794921875, -19.1916446685791, 0.11912155151367188, 65.17715454101562, 18.121322631835938, 34.503211975097656, 24.45817756652832, 24.15968132019043, 42.75970458984375, 37.153175354003906, 11.641080856323242, 47.38233184814453, 0.43512725830078125, 0.9960746765136719, 24.37360382080078, 39.016441345214844, 33.158111572265625, 16.41509246826172, 3.3024215698242188, 23.87474250793457, 2.1970558166503906, 74.37548828125, -7.339141845703125, 22.03972625732422, 39.14253616333008], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000451.npy"} +{"epoch": 0.6622613803230544, "step": 452, "batch_size": 64, "mean": 30.134782791137695, "std": 22.0466365814209, "min": -4.161996841430664, "p10": 6.748517227172852, "median": 28.01622772216797, "p90": 58.3771224975586, "max": 121.65464782714844, "pos_frac": 0.984375, "sample": [19.193878173828125, 30.163841247558594, 59.77197265625, 18.757186889648438, 17.05280303955078, 31.644962310791016, 30.22182273864746, 36.91741943359375, 32.09228515625, 11.604270935058594, 83.830810546875, 34.01471710205078, 9.323318481445312, 4.419153213500977, 59.51911926269531, 17.084609985351562, 31.889509201049805, 45.724342346191406, 37.134864807128906, 4.101774215698242, 43.297760009765625, 9.954566955566406, 74.671875, 25.678955078125, 17.825973510742188, 54.3214111328125, 21.321176528930664, -4.161996841430664, 64.3399658203125, 17.139686584472656, 5.4506072998046875, 50.887451171875, 10.956104278564453, 28.745391845703125, 55.71246337890625, 29.77353286743164, 17.56174087524414, 35.15658950805664, 5.026496887207031, 25.04772186279297, 6.849857330322266, 5.8048858642578125, 33.5859375, 37.297637939453125, 9.432991027832031, 34.042938232421875, 22.1962890625, 28.745941162109375, 121.65464782714844, 21.7476806640625, 35.45330047607422, 29.30978775024414, 41.39874267578125, 24.776397705078125, 20.278518676757812, 11.585563659667969, 15.416519165039062, 79.11802673339844, 6.705085754394531, 37.29829406738281, 27.287063598632812, 41.27345275878906, 9.016998291015625, 25.17949104309082], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000452.npy"} +{"epoch": 0.6637298091042585, "step": 453, "batch_size": 64, "mean": 29.085241317749023, "std": 25.585979461669922, "min": -17.163497924804688, "p10": 1.6516025543212907, "median": 22.94738006591797, "p90": 63.12253494262696, "max": 104.78207397460938, "pos_frac": 0.9375, "sample": [45.76201629638672, 61.28010559082031, 18.436935424804688, 9.926237106323242, 71.77098846435547, 6.7372589111328125, 4.882667541503906, 53.729827880859375, 13.025899887084961, 14.552841186523438, 42.22303771972656, 25.84673309326172, 37.40110397338867, 21.52789306640625, 53.92232894897461, 19.5067138671875, 35.83775329589844, 30.854406356811523, 6.127889633178711, 9.570724487304688, 28.960205078125, -8.40695571899414, 12.822883605957031, 32.62895965576172, 85.05792236328125, 17.669189453125, 28.112472534179688, 37.23273468017578, -11.960418701171875, 56.73724365234375, 14.487964630126953, 7.310089111328125, 54.75642395019531, 64.30835723876953, 63.912147521972656, 21.30789566040039, 0.7525787353515625, 8.98782730102539, 104.78207397460938, 60.35789489746094, 3.9046783447265625, -1.4800968170166016, 88.01313781738281, 8.621002197265625, 25.539581298828125, 3.1732635498046875, 24.366867065429688, 18.712135314941406, 32.976158142089844, 58.85124969482422, -17.163497924804688, 49.820526123046875, 0.9994621276855469, 33.25894546508789, 35.12945556640625, 12.533113479614258, 20.874313354492188, 13.352880477905273, 66.75457763671875, 59.27079772949219, 7.222164154052734, 33.953704833984375, 0.7475814819335938, 19.282691955566406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000453.npy"} +{"epoch": 0.6651982378854625, "step": 454, "batch_size": 64, "mean": 32.31932830810547, "std": 26.018522262573242, "min": -18.416839599609375, "p10": 0.6077869415283209, "median": 25.710498809814453, "p90": 73.8269874572754, "max": 100.41751098632812, "pos_frac": 0.921875, "sample": [1.1672897338867188, 23.782196044921875, 73.77877807617188, 46.50996398925781, 39.86847686767578, 24.374900817871094, 21.30584716796875, 26.062149047851562, 9.886470794677734, 32.082420349121094, -18.416839599609375, 13.815483093261719, 25.58344268798828, 82.5283203125, 22.26168441772461, 28.449050903320312, 16.847505569458008, 44.23614501953125, 0.3680000305175781, 25.837554931640625, 32.219573974609375, 58.72084045410156, 20.75299072265625, 74.6828384399414, 36.272682189941406, 38.47045135498047, 24.524185180664062, 6.719297409057617, 63.68861389160156, 48.652931213378906, 90.84257507324219, 6.818145751953125, 53.44004821777344, 24.103988647460938, 73.84764862060547, 21.589576721191406, 24.871604919433594, 17.581497192382812, 68.35011291503906, 35.77753448486328, 72.60481262207031, 30.095497131347656, 6.2510528564453125, 41.90676498413086, 23.007553100585938, 76.22222900390625, 21.655303955078125, 17.432899475097656, 44.918846130371094, 0.19397735595703125, 21.801742553710938, 100.41751098632812, -10.520973205566406, 46.425392150878906, 12.438112258911133, 36.49360656738281, -2.8759307861328125, -9.535568237304688, 29.381736755371094, 18.772750854492188, 36.377716064453125, -1.4940032958984375, 78.43634033203125, 15.77362060546875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000454.npy"} +{"epoch": 0.6666666666666666, "step": 455, "batch_size": 64, "mean": 37.20379638671875, "std": 26.521596908569336, "min": -26.825836181640625, "p10": 2.3324262619018556, "median": 37.252262115478516, "p90": 76.84151382446291, "max": 93.70462036132812, "pos_frac": 0.96875, "sample": [18.737071990966797, 27.565261840820312, 47.944236755371094, 80.86083984375, 8.166744232177734, 43.258270263671875, 55.37879943847656, 1.1786937713623047, 78.67581176757812, 2.192049026489258, 67.7850341796875, 33.013301849365234, 34.11946105957031, 16.5115966796875, 81.6773681640625, -26.825836181640625, 80.2081298828125, 91.47824096679688, 20.61573028564453, -9.356147766113281, 1.7310562133789062, 60.357383728027344, 18.959548950195312, 19.022127151489258, 72.56148529052734, 78.82752990722656, 44.37101364135742, 4.657585144042969, 15.828277587890625, 13.074386596679688, 48.6617546081543, 48.65846633911133, 21.804412841796875, 27.104263305664062, 17.548681259155273, 8.298248291015625, 22.659015655517578, 54.538902282714844, 2.65997314453125, 7.675376892089844, 44.82029342651367, 34.66143798828125, 66.99505615234375, 39.42748260498047, 0.3526458740234375, 28.456336975097656, 0.7443561553955078, 43.06805419921875, 45.84064483642578, 30.413436889648438, 62.596763610839844, 35.07704162597656, 59.44419860839844, 54.70497131347656, 40.99761199951172, 28.255847930908203, 17.499980926513672, 49.86378479003906, 58.47323989868164, 52.24974060058594, 93.70462036132812, 48.15660858154297, 47.0210075378418, 56.033935546875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000455.npy"} +{"epoch": 0.6681350954478708, "step": 456, "batch_size": 64, "mean": 37.51462936401367, "std": 28.913846969604492, "min": -13.976806640625, "p10": 4.456723976135255, "median": 34.63663864135742, "p90": 81.35093231201172, "max": 101.62435913085938, "pos_frac": 0.9375, "sample": [14.190990447998047, -5.2823486328125, 80.31143188476562, 32.870269775390625, 15.349319458007812, 47.89302062988281, 19.98245620727539, -6.426185607910156, 33.01219940185547, 81.79643249511719, 39.936553955078125, 4.183294296264648, 12.477005004882812, 89.42861938476562, 28.318344116210938, 50.815834045410156, 38.47833251953125, 61.21478271484375, 5.712789535522461, 34.331268310546875, 6.096771240234375, 13.245922088623047, 5.0947265625, 73.97064208984375, 10.92184066772461, 60.763885498046875, 61.11482238769531, 56.48090362548828, 68.33953857421875, 47.79327392578125, 36.793678283691406, 85.12008666992188, 75.21076202392578, 23.97555923461914, -13.976806640625, -0.335693359375, 17.990266799926758, 45.387428283691406, 78.02250671386719, 0.8581085205078125, 44.336448669433594, 37.87107849121094, 23.086631774902344, 9.524137496948242, 27.429065704345703, 16.25042724609375, 9.498517990112305, 25.76153564453125, 101.62435913085938, 6.343544006347656, 94.92825317382812, 61.502166748046875, 48.86668395996094, 11.405403137207031, 22.648372650146484, 44.02361297607422, 55.056373596191406, 26.590164184570312, 38.741180419921875, 3.0373764038085938, 34.94200897216797, 83.49264526367188, 94.24044799804688, 48.273216247558594], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000456.npy"} +{"epoch": 0.6696035242290749, "step": 457, "batch_size": 64, "mean": 33.051368713378906, "std": 24.597646713256836, "min": -25.799957275390625, "p10": 6.163788414001466, "median": 30.566932678222656, "p90": 62.195135498046874, "max": 102.76638793945312, "pos_frac": 0.9375, "sample": [28.021617889404297, 54.267242431640625, 63.92877197265625, 18.760337829589844, 26.345169067382812, 14.611665725708008, 45.76805114746094, 51.94842529296875, 39.772178649902344, 23.576385498046875, 102.76638793945312, 83.98298645019531, 14.190570831298828, -8.592571258544922, 52.959747314453125, 36.92144775390625, 40.791526794433594, 11.585437774658203, 56.98389434814453, 48.81513977050781, 17.314414978027344, 7.501056671142578, 17.41301727294922, 49.33818054199219, 17.923065185546875, 41.24567413330078, -3.619903564453125, 8.436016082763672, 81.10616302490234, 35.8990478515625, 42.835113525390625, 21.63775634765625, 8.194339752197266, 52.18004608154297, 44.74797821044922, 0.2057342529296875, 66.65180969238281, 31.92559051513672, 27.1773681640625, 36.671600341796875, 27.860702514648438, 28.986690521240234, 44.83995056152344, -10.267223358154297, 5.590673446655273, 7.913948059082031, 62.38628387451172, 32.13549041748047, 37.08586883544922, 61.749122619628906, 23.522315979003906, 51.412715911865234, 34.32111358642578, 94.20541381835938, -25.799957275390625, 25.640342712402344, 49.93170928955078, 14.477462768554688, 2.834423065185547, 24.399452209472656, 45.39238739013672, 24.03191375732422, 29.208274841308594, 11.239913940429688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000457.npy"} +{"epoch": 0.671071953010279, "step": 458, "batch_size": 64, "mean": 38.03219223022461, "std": 32.63706970214844, "min": -21.762176513671875, "p10": -0.1276874542236316, "median": 39.85420227050781, "p90": 73.12674102783203, "max": 152.34742736816406, "pos_frac": 0.890625, "sample": [12.004779815673828, 2.8549118041992188, 52.30049133300781, 55.70151901245117, 26.578853607177734, -21.762176513671875, 139.55844116210938, -6.09046745300293, 58.96630859375, 53.862335205078125, 4.943807601928711, 28.78095245361328, 94.13729858398438, 29.510028839111328, 110.61538696289062, -16.82733917236328, 17.304386138916016, 1.1288032531738281, 50.801883697509766, 40.936004638671875, 46.910560607910156, 72.87847900390625, 38.608360290527344, 73.23313903808594, 46.885009765625, 43.28025817871094, 13.758316040039062, 44.427001953125, 34.18341064453125, 45.414207458496094, 44.017425537109375, 88.85408782958984, 43.207855224609375, 50.69416046142578, 40.4561767578125, -4.5373382568359375, 47.65773010253906, 14.604171752929688, 45.31629180908203, 76.21771240234375, 39.252227783203125, 30.772201538085938, -8.602119445800781, -0.6661834716796875, 34.22229766845703, 11.000076293945312, 43.226810455322266, 3.936676025390625, 27.073638916015625, 152.34742736816406, 18.67913818359375, 15.439868927001953, 28.363210678100586, 28.410852432250977, 15.386459350585938, 41.642730712890625, 55.94599151611328, 55.43564987182617, 22.922653198242188, 40.60783767700195, 38.06170654296875, 46.41373062133789, -10.262954711914062, 63.077110290527344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000458.npy"} +{"epoch": 0.6725403817914831, "step": 459, "batch_size": 64, "mean": 28.914405822753906, "std": 23.701152801513672, "min": -28.20240020751953, "p10": 1.7999582290649414, "median": 28.115949630737305, "p90": 61.58744812011719, "max": 83.88978576660156, "pos_frac": 0.96875, "sample": [11.94476318359375, 8.003776550292969, 3.3608474731445312, 44.31231689453125, 19.84991455078125, 32.5023078918457, 26.350799560546875, 8.621139526367188, 38.63005828857422, 34.205970764160156, 29.760910034179688, -2.5316619873046875, 48.55615234375, 34.120819091796875, 0.3209056854248047, 13.126296997070312, 51.965492248535156, 18.15026092529297, 1.3859939575195312, 83.88978576660156, 1.3308181762695312, 79.43084716796875, 30.333221435546875, 10.315620422363281, 14.919784545898438, 34.38861846923828, 50.575008392333984, 3.4180450439453125, 10.746112823486328, 34.11378479003906, 34.63698196411133, 40.60511016845703, 69.88606262207031, -28.20240020751953, 46.82234191894531, 1.8747997283935547, 69.72428894042969, 29.369457244873047, 1.76788330078125, 21.975067138671875, 26.862442016601562, 48.429237365722656, 20.801483154296875, 9.59967041015625, 20.524383544921875, 47.94171905517578, 4.039314270019531, 58.52937316894531, 15.078914642333984, 61.46074295043945, 62.23253631591797, 44.941261291503906, 7.786994934082031, 24.195510864257812, 11.152732849121094, 54.3887939453125, 5.201560974121094, 0.7782878875732422, 61.64175033569336, 76.14968872070312, 54.985191345214844, 30.031982421875, 3.5632266998291016, 35.6468505859375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000459.npy"} +{"epoch": 0.6740088105726872, "step": 460, "batch_size": 64, "mean": 32.56512451171875, "std": 25.778074264526367, "min": -12.71712875366211, "p10": -1.0604377746582014, "median": 34.04936981201172, "p90": 64.51049270629883, "max": 109.62905883789062, "pos_frac": 0.890625, "sample": [39.79206848144531, 56.30812072753906, 31.513885498046875, 30.92587661743164, 22.825355529785156, 61.32693862915039, 62.35296630859375, 50.090240478515625, 11.452262878417969, 13.435474395751953, 16.80742645263672, 46.094383239746094, -12.71712875366211, 29.610992431640625, 3.580249786376953, 36.730743408203125, 19.657211303710938, 11.94549560546875, 1.8625030517578125, 70.75096130371094, 13.293010711669922, 34.556419372558594, 79.94682312011719, 9.580474853515625, 64.97972106933594, 36.65340805053711, 109.62905883789062, 42.56437683105469, 36.47569274902344, 32.941558837890625, 34.448760986328125, -2.1483688354492188, 58.134979248046875, 2.9430084228515625, 21.958663940429688, 89.51553344726562, -4.703874588012695, 40.226905822753906, 47.76408386230469, 52.818084716796875, 43.053199768066406, 33.83136749267578, 6.896888732910156, 40.9849739074707, 69.42545318603516, -4.93391227722168, 28.507354736328125, 48.61537170410156, 19.474098205566406, 39.40122985839844, 11.223350524902344, 0.45778656005859375, 7.733558654785156, -1.7111053466796875, -9.343658447265625, 42.51335906982422, 41.75773620605469, 19.118370056152344, -3.3088836669921875, 19.073997497558594, 63.415626525878906, 34.267372131347656, 51.61867904663086, 76.17129516601562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000460.npy"} +{"epoch": 0.6754772393538914, "step": 461, "batch_size": 64, "mean": 27.81532859802246, "std": 28.51862144470215, "min": -17.881759643554688, "p10": -0.8643449783325196, "median": 24.851158142089844, "p90": 54.906156921386724, "max": 134.0843505859375, "pos_frac": 0.875, "sample": [27.682525634765625, 3.9995975494384766, 6.770637512207031, 11.829444885253906, 29.44314193725586, 43.8443603515625, 6.873870849609375, 55.159034729003906, 5.395263671875, 11.908309936523438, 25.009246826171875, -17.30498504638672, 33.41563415527344, 34.191368103027344, 134.0843505859375, 39.71587371826172, 76.08619689941406, -0.8391094207763672, -0.8751602172851562, 12.593246459960938, 13.1365966796875, 25.645402908325195, 54.31610870361328, 73.78727722167969, 39.934242248535156, 8.946136474609375, 50.51829528808594, 129.6348419189453, 25.440561294555664, 21.307098388671875, 49.4793701171875, 23.75395393371582, 51.407779693603516, 8.131935119628906, 39.9178466796875, -2.6045684814453125, 9.979705810546875, 4.01416015625, -17.881759643554688, 24.693069458007812, 69.55545043945312, -5.04887580871582, 35.18473815917969, 29.769981384277344, -6.232931137084961, 4.36981201171875, 25.20418930053711, 52.766204833984375, 13.050430297851562, 13.539337158203125, 23.247421264648438, 8.993600845336914, 21.989933013916016, 6.138702392578125, 19.74422836303711, -10.533905029296875, 4.75225830078125, 29.698537826538086, 42.771690368652344, 61.66862487792969, 38.67424011230469, 45.83118438720703, 48.68650817871094, 33.81871795654297], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000461.npy"} +{"epoch": 0.6769456681350955, "step": 462, "batch_size": 64, "mean": 26.86299705505371, "std": 25.792369842529297, "min": -22.348979949951172, "p10": -0.24614410400390557, "median": 20.27871322631836, "p90": 70.00483627319336, "max": 85.3882064819336, "pos_frac": 0.890625, "sample": [22.12017822265625, -0.5392227172851562, -5.077178955078125, 24.197525024414062, 77.29187774658203, 36.402069091796875, 19.577150344848633, 0.9583663940429688, 17.402328491210938, 11.845718383789062, 7.9623870849609375, 8.505889892578125, 19.56015396118164, -9.60603141784668, 35.299949645996094, 69.16268920898438, 70.28184509277344, 44.69921875, -10.10565185546875, 43.43589401245117, 0.43770599365234375, 85.3882064819336, 32.25101089477539, 2.7854156494140625, 25.563152313232422, 18.01123046875, 14.908378601074219, 37.5452880859375, 2.7500858306884766, 16.755001068115234, 83.4556655883789, 10.312793731689453, 14.50838851928711, 32.33351135253906, 20.795989990234375, 73.4481430053711, 9.198898315429688, -0.8009052276611328, 1.7895050048828125, 79.25907897949219, 12.459722518920898, 18.621444702148438, 36.20200729370117, 38.96106719970703, 37.45348358154297, 21.055030822753906, 47.696044921875, 74.4595947265625, 12.648170471191406, 45.398521423339844, -9.95330810546875, 19.761436462402344, 69.35848236083984, 60.80208206176758, 44.42725372314453, 9.620811462402344, 23.03777313232422, 24.0302791595459, 18.741470336914062, 58.133262634277344, 7.905364990234375, 22.46332550048828, -22.348979949951172, 4.225757598876953], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000462.npy"} +{"epoch": 0.6784140969162996, "step": 463, "batch_size": 64, "mean": 33.929237365722656, "std": 24.047834396362305, "min": -25.2232723236084, "p10": 3.096006774902345, "median": 33.921653747558594, "p90": 60.983084869384776, "max": 97.442138671875, "pos_frac": 0.921875, "sample": [2.587554931640625, 67.21176147460938, -25.2232723236084, -4.634788513183594, 49.370567321777344, 39.961883544921875, 13.771778106689453, 27.432899475097656, 15.98880386352539, 11.463348388671875, 33.207069396972656, 28.51348114013672, 43.81004333496094, 76.47747802734375, 24.98670196533203, 16.217164993286133, 23.844528198242188, 25.964235305786133, 41.252342224121094, 36.647945404052734, 46.451988220214844, 97.442138671875, 28.071495056152344, 39.68681335449219, 15.202018737792969, 82.5810546875, -2.9328956604003906, 18.103668212890625, 45.280792236328125, 54.674232482910156, 17.850006103515625, 34.897560119628906, -3.6511001586914062, 4.2823944091796875, 47.652099609375, -2.593597412109375, 18.473907470703125, 24.232406616210938, 43.21308135986328, 17.736164093017578, 34.08624267578125, 57.90570068359375, 50.238487243652344, 38.08296203613281, 54.383544921875, 44.59825134277344, 22.076255798339844, 19.85430145263672, 48.909095764160156, 33.75706481933594, 62.301963806152344, 82.59451293945312, 39.1777458190918, 32.43257141113281, 24.014320373535156, 2.4518280029296875, 49.602909088134766, 4.5832977294921875, 34.79118347167969, 57.74217987060547, 35.17169189453125, 53.91741180419922, 92.52934265136719, 20.762496948242188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000463.npy"} +{"epoch": 0.6798825256975036, "step": 464, "batch_size": 64, "mean": 34.593727111816406, "std": 29.905498504638672, "min": -33.46949005126953, "p10": 0.7120391845703142, "median": 29.09914207458496, "p90": 71.723583984375, "max": 127.978515625, "pos_frac": 0.890625, "sample": [-0.00213623046875, 44.269073486328125, 47.25923156738281, 3.0769271850585938, 61.56658172607422, 33.31689453125, 18.980438232421875, 15.116300582885742, 20.435516357421875, 42.732666015625, 18.3197021484375, 81.00225830078125, 12.57464599609375, 40.8863525390625, 55.98579406738281, 58.50590515136719, 25.826919555664062, 36.3248176574707, -2.908355712890625, 72.02810668945312, 30.919689178466797, 42.334693908691406, 42.33069610595703, 3.85040283203125, 60.38267517089844, -6.69598388671875, 43.48932647705078, 120.31324768066406, 3.3758773803710938, 22.446327209472656, 17.6651611328125, 77.29057312011719, 27.278594970703125, 8.903858184814453, -0.40301513671875, 42.659908294677734, 42.46409606933594, 18.17325210571289, -4.417449951171875, 22.882781982421875, 26.697914123535156, 23.36342430114746, 71.01303100585938, 11.334653854370117, 16.233463287353516, 54.66116714477539, -33.46949005126953, 35.503684997558594, 7.609764099121094, 53.223602294921875, 44.7186279296875, 88.47660827636719, 127.978515625, 63.76002502441406, 21.597213745117188, 62.14696502685547, 80.92543029785156, -2.4341506958007812, 55.07243347167969, 49.39668273925781, 26.896881103515625, 2.378448486328125, 17.309661865234375, 9.06158447265625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000464.npy"} +{"epoch": 0.6813509544787077, "step": 465, "batch_size": 64, "mean": 37.622222900390625, "std": 25.430631637573242, "min": -10.123451232910156, "p10": 5.885792541503908, "median": 33.624732971191406, "p90": 71.749853515625, "max": 97.96784973144531, "pos_frac": 0.953125, "sample": [49.09599304199219, 73.2752685546875, 28.51665496826172, 21.247093200683594, 9.791635513305664, 39.29869079589844, 34.91258239746094, 33.143768310546875, 11.16524887084961, 31.375694274902344, 24.484268188476562, 51.42681884765625, 4.636348724365234, 10.210426330566406, 63.56787109375, 97.96784973144531, 29.22525978088379, 54.49702072143555, 34.818359375, 31.873184204101562, 12.783409118652344, 54.70854568481445, 92.74154663085938, 33.6077880859375, 85.5406494140625, 2.0242843627929688, 91.25798034667969, 18.55218505859375, 21.20966339111328, 27.0711669921875, 16.916501998901367, 71.797607421875, 40.50733184814453, 35.847755432128906, 28.8526668548584, 55.18634033203125, -10.123451232910156, 45.52445983886719, 26.424243927001953, 47.7342529296875, 7.227836608886719, 31.932693481445312, 33.119361877441406, -7.1622161865234375, 67.92793273925781, 5.310630798339844, 39.66534423828125, 48.86627960205078, 14.074825286865234, 40.0515251159668, 71.638427734375, 2.4873600006103516, 46.70941162109375, -4.482887268066406, 63.50152587890625, 88.08541870117188, 19.564777374267578, 35.46464538574219, 30.898513793945312, 51.50879669189453, 23.947891235351562, 61.4095458984375, 69.73804473876953, 33.64167785644531], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000465.npy"} +{"epoch": 0.6828193832599119, "step": 466, "batch_size": 64, "mean": 34.00209045410156, "std": 23.88724136352539, "min": -20.9278564453125, "p10": 5.35496826171875, "median": 33.67691993713379, "p90": 67.63508377075196, "max": 84.46217346191406, "pos_frac": 0.9375, "sample": [52.75814437866211, 24.87525177001953, 22.42383575439453, 16.585174560546875, 33.4176025390625, 26.976768493652344, 18.91462516784668, 57.33695983886719, 39.01976013183594, 10.982646942138672, 30.200103759765625, 53.536705017089844, -20.9278564453125, 57.612762451171875, 16.390888214111328, 48.172874450683594, 37.95844268798828, 13.388439178466797, 32.3870849609375, 24.19963836669922, 10.361526489257812, 57.18882751464844, 52.924232482910156, 67.20094299316406, 70.26954650878906, 30.760761260986328, 62.698307037353516, 73.41537475585938, 38.55647277832031, 40.58006286621094, 25.291900634765625, 33.93623733520508, 40.99681854248047, 84.46217346191406, 19.94689178466797, 63.67509460449219, 42.80661392211914, 45.617767333984375, 17.371658325195312, 22.594139099121094, 1.861989974975586, 3.783489227294922, 27.734878540039062, 75.41008758544922, 39.54823303222656, 67.8211441040039, 17.304637908935547, 11.515731811523438, 5.77178955078125, 34.862403869628906, 39.30790710449219, 22.395221710205078, 39.5546875, 26.234092712402344, 40.974395751953125, 78.49859619140625, -13.916389465332031, 16.72906494140625, 77.65171813964844, 5.17633056640625, 50.048927307128906, -7.586029052734375, 34.57239532470703, -15.986831665039062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000466.npy"} +{"epoch": 0.684287812041116, "step": 467, "batch_size": 64, "mean": 34.46977996826172, "std": 28.059871673583984, "min": -13.036460876464844, "p10": 3.222737503051759, "median": 28.515260696411133, "p90": 75.22916107177736, "max": 126.71263122558594, "pos_frac": 0.921875, "sample": [51.38999938964844, 50.250518798828125, 126.71263122558594, 14.063018798828125, -7.001045227050781, -9.145172119140625, 22.71346664428711, 38.835235595703125, 35.616600036621094, 54.46742248535156, 76.83523559570312, 61.193206787109375, 36.49995422363281, -0.5970973968505859, 80.86062622070312, 51.478736877441406, 22.18663787841797, 5.969337463378906, 92.55773162841797, 39.044769287109375, 41.74391174316406, 7.163066864013672, 27.450706481933594, 80.99784851074219, 14.01995849609375, 17.319801330566406, 22.72820281982422, 14.104110717773438, 73.03697204589844, 35.232818603515625, 35.16838073730469, 2.762248992919922, 41.01275634765625, 14.819778442382812, 69.30880737304688, 20.927947998046875, 76.16867065429688, 22.31954574584961, 19.991775512695312, 4.297210693359375, 34.619384765625, 16.56775665283203, 34.30069351196289, 14.796348571777344, 39.50756072998047, 19.468734741210938, 9.292861938476562, -2.4877548217773438, 1.3106689453125, 60.796958923339844, 56.75653076171875, 64.75720977783203, 8.055496215820312, 29.579814910888672, 50.917724609375, 25.945030212402344, 49.600006103515625, 18.550872802734375, 22.756813049316406, 96.24759674072266, -13.036460876464844, 46.03938674926758, 13.912757873535156, 23.303466796875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000467.npy"} +{"epoch": 0.6857562408223201, "step": 468, "batch_size": 64, "mean": 32.051353454589844, "std": 26.79055404663086, "min": -10.897687911987305, "p10": 2.9351116180419936, "median": 27.947189331054688, "p90": 76.59086685180667, "max": 107.06901550292969, "pos_frac": 0.921875, "sample": [47.37403869628906, -10.897687911987305, 59.838043212890625, 17.051666259765625, 27.545867919921875, 34.871063232421875, 89.47651672363281, 34.07688903808594, 81.30435180664062, 62.154571533203125, 69.4819564819336, 7.5366363525390625, 31.662155151367188, 19.989356994628906, -6.967061996459961, 8.306640625, 30.109607696533203, 9.447893142700195, 35.48223876953125, 53.19123840332031, 26.301788330078125, 7.02165412902832, -4.260231018066406, 27.079872131347656, 32.587867736816406, 12.135986328125, 44.98766326904297, 89.53636169433594, 45.704345703125, 4.54925537109375, 0.2938671112060547, 11.103538513183594, 52.73918533325195, 27.56391143798828, 34.88214111328125, 16.048812866210938, 26.032798767089844, 31.80545425415039, 79.63754272460938, 28.330467224121094, 95.34387969970703, 22.648630142211914, 14.518020629882812, 22.132766723632812, 34.83854675292969, 22.3896484375, 46.015289306640625, 31.400314331054688, 11.061386108398438, 107.06901550292969, 33.82301330566406, 39.5799560546875, 2.243335723876953, -0.29131317138671875, 8.739151000976562, 39.70549774169922, 41.94384765625, 37.630035400390625, 89.51520538330078, -6.86749267578125, 27.402053833007812, 6.931606292724609, 15.511962890625, 12.884025573730469], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000468.npy"} +{"epoch": 0.6872246696035242, "step": 469, "batch_size": 64, "mean": 33.17765426635742, "std": 27.324230194091797, "min": -14.433998107910156, "p10": 0.01942462921142818, "median": 28.768460273742676, "p90": 67.58548736572266, "max": 126.06108856201172, "pos_frac": 0.890625, "sample": [50.43603515625, 34.44407653808594, 26.681119918823242, 36.59138488769531, 16.759002685546875, 4.065900802612305, 23.327930450439453, 21.781471252441406, 21.98193359375, -8.945960998535156, 59.154510498046875, 41.937347412109375, 29.39613151550293, -2.11541748046875, 68.87841796875, 32.985557556152344, 67.71148681640625, 41.359283447265625, 36.117164611816406, 58.8853759765625, 77.6664047241211, 19.467567443847656, 42.17597579956055, 14.218917846679688, 83.55841064453125, 3.153533935546875, 48.671974182128906, 67.29148864746094, 90.35675048828125, 2.714691162109375, 27.631763458251953, 25.223602294921875, 54.790077209472656, 2.365468978881836, 23.084075927734375, -8.909769058227539, 29.38611602783203, -4.0488128662109375, 7.732475280761719, 15.113357543945312, 67.20143127441406, -11.456741333007812, 54.128074645996094, 19.62313461303711, 27.426361083984375, 41.11274719238281, 28.15080451965332, 34.71693420410156, 60.11683654785156, 3.9286632537841797, 25.195571899414062, 69.70828247070312, -0.98602294921875, 126.06108856201172, 61.442291259765625, 65.65177154541016, -14.433998107910156, 40.23100662231445, 21.91338348388672, 23.385391235351562, 30.056350708007812, 14.062576293945312, 30.912673950195312, 22.144454956054688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000469.npy"} +{"epoch": 0.6886930983847284, "step": 470, "batch_size": 64, "mean": 38.34840393066406, "std": 24.011594772338867, "min": -5.076141357421875, "p10": 8.655946350097658, "median": 38.78763961791992, "p90": 66.63794708251953, "max": 102.77564239501953, "pos_frac": 0.953125, "sample": [24.62181854248047, 54.27960968017578, 22.305641174316406, 42.73301696777344, 53.88334655761719, 29.432518005371094, 38.95899200439453, 42.44816589355469, 17.753402709960938, 33.87928009033203, 54.48805236816406, 26.350852966308594, 38.63288116455078, 13.179054260253906, 49.864070892333984, 34.629974365234375, 12.06294059753418, 99.90216064453125, -5.076141357421875, 35.777565002441406, 44.549827575683594, 6.7599639892578125, 44.312522888183594, -3.7939834594726562, 43.6031494140625, 18.98322105407715, 26.067028045654297, 74.61746978759766, 35.86232376098633, 2.3440170288085938, 65.68313598632812, 28.508413314819336, 66.95478820800781, 28.36595916748047, 8.056272506713867, 34.8602294921875, 48.027400970458984, 54.449256896972656, 40.052276611328125, 19.40416717529297, 51.00994110107422, 39.287841796875, 65.89865112304688, 65.2201156616211, 22.31829833984375, 102.77564239501953, 56.10356903076172, 10.055185317993164, 69.29752349853516, 59.76116943359375, -2.597503662109375, 13.182540893554688, 28.708145141601562, 38.94239807128906, 44.90203857421875, 73.17764282226562, 13.549667358398438, 61.62617492675781, 43.701934814453125, 5.3107757568359375, 95.06727600097656, 40.38550567626953, 36.435707092285156, 12.402824401855469], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000470.npy"} +{"epoch": 0.6901615271659325, "step": 471, "batch_size": 64, "mean": 31.3731689453125, "std": 24.57742691040039, "min": -16.857240676879883, "p10": 1.8243820190429703, "median": 32.074214935302734, "p90": 61.725701904296876, "max": 99.42613220214844, "pos_frac": 0.90625, "sample": [55.52122497558594, -2.7848281860351562, 47.4036865234375, 50.24638366699219, 6.84259033203125, 40.8767204284668, 14.368362426757812, 72.24681091308594, 56.514007568359375, 3.490814208984375, 39.40876770019531, 6.28546142578125, 51.612144470214844, 6.8938446044921875, 89.12779235839844, 5.923858642578125, 11.241214752197266, -1.4592132568359375, 48.368186950683594, 67.089111328125, 99.42613220214844, 4.6474456787109375, 47.457984924316406, 50.81333541870117, 54.26960754394531, -16.857240676879883, -6.5189971923828125, 23.480443954467773, 37.67347717285156, 39.925514221191406, 10.289346694946289, 33.444488525390625, 14.107261657714844, 50.14411163330078, 27.53886604309082, 62.12461471557617, 61.87627410888672, 3.459442138671875, 34.3740234375, 61.11493682861328, 38.46162033081055, 15.200660705566406, 50.95641326904297, 22.279064178466797, 30.99957275390625, 33.37623596191406, 34.8389892578125, 1.1236419677734375, 23.176406860351562, 14.831787109375, -1.5977783203125, 26.907264709472656, -3.769674301147461, 53.537841796875, 18.218902587890625, 18.072509765625, 4.173982620239258, 17.472095489501953, 26.937301635742188, 66.75338745117188, 24.797149658203125, 61.374366760253906, 34.604156494140625, 33.14885711669922], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000471.npy"} +{"epoch": 0.6916299559471366, "step": 472, "batch_size": 64, "mean": 40.2446174621582, "std": 33.216896057128906, "min": -9.222412109375, "p10": 5.102720642089844, "median": 33.46429443359375, "p90": 81.19024047851563, "max": 138.20571899414062, "pos_frac": 0.9375, "sample": [30.46471405029297, 5.710945129394531, 25.675155639648438, -5.607330322265625, 5.225799560546875, 45.859825134277344, 6.229877471923828, -9.222412109375, 41.72010803222656, -4.850933074951172, 29.734004974365234, 63.18733596801758, 36.445068359375, 10.026914596557617, 33.492095947265625, 121.41729736328125, 36.739845275878906, 12.335494995117188, 23.005828857421875, 33.436492919921875, 116.81573486328125, 18.65457534790039, 33.992340087890625, 37.175498962402344, 3.605794906616211, -3.7319259643554688, 30.691940307617188, 16.653121948242188, 78.44798278808594, 82.36549377441406, 14.976470947265625, 42.72749328613281, 48.61663818359375, 75.39169311523438, 134.95468139648438, 5.0499725341796875, 18.122440338134766, 97.58377075195312, 58.31597900390625, 34.26036834716797, 52.11842346191406, 67.18684387207031, 26.294063568115234, 64.11555480957031, 28.597396850585938, 138.20571899414062, 105.955078125, 37.8204345703125, 57.92366027832031, 33.387855529785156, 48.54193115234375, 17.04857635498047, 54.9114990234375, 8.936958312988281, 49.97947692871094, 54.13496398925781, 58.52496337890625, 25.01032257080078, 2.3975601196289062, 33.23497009277344, 25.485477447509766, 26.480152130126953, 23.945846557617188, 49.721534729003906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000472.npy"} +{"epoch": 0.6930983847283406, "step": 473, "batch_size": 64, "mean": 28.15515899658203, "std": 22.63782501220703, "min": -19.25145721435547, "p10": -1.4527803421020495, "median": 29.29606819152832, "p90": 54.7385139465332, "max": 80.83113098144531, "pos_frac": 0.84375, "sample": [13.837730407714844, 22.076414108276367, 39.388946533203125, 59.755767822265625, 17.797527313232422, 32.128814697265625, 19.894668579101562, 16.41362762451172, 41.139801025390625, -0.36353111267089844, 37.8656005859375, 50.335609436035156, -1.9196014404296875, 10.354705810546875, 44.63457489013672, 17.531465530395508, 40.8004150390625, -0.1353759765625, 45.07389450073242, 40.55422592163086, -7.79088020324707, 66.8367919921875, 54.81071472167969, 30.333833694458008, 39.80708694458008, 21.126333236694336, 25.335113525390625, 73.71730041503906, 26.55409049987793, 35.55937957763672, 5.745258331298828, 43.10881805419922, 8.392341613769531, 18.041000366210938, -4.6004486083984375, 11.512775421142578, 80.83113098144531, 10.635108947753906, 42.01557922363281, 66.89398193359375, 22.237258911132812, 38.524253845214844, 43.614105224609375, 16.34130859375, 28.575851440429688, 33.274200439453125, -19.25145721435547, -7.545684814453125, -7.412506103515625, 54.570045471191406, 69.3982162475586, 49.3365478515625, 7.630247116088867, -3.1351242065429688, 30.016284942626953, 14.470836639404297, 41.43329620361328, -0.28057861328125, 38.71898651123047, 53.579063415527344, 4.7105560302734375, 46.419071197509766, 3.658151626586914, 47.016658782958984], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000473.npy"} +{"epoch": 0.6945668135095447, "step": 474, "batch_size": 64, "mean": 30.685691833496094, "std": 25.61254119873047, "min": -12.856475830078125, "p10": -3.3217891693115233, "median": 28.060593605041504, "p90": 65.52381286621093, "max": 92.20222473144531, "pos_frac": 0.8125, "sample": [22.133270263671875, 27.782304763793945, 12.345890045166016, 68.2520523071289, 91.73446655273438, 30.97478485107422, 86.99765014648438, -4.227571487426758, 50.89583969116211, 21.05217742919922, 23.03689193725586, 35.87664794921875, 5.724756240844727, 16.70209503173828, 10.882125854492188, 43.66058349609375, 17.3743953704834, -0.044116973876953125, -12.856475830078125, 31.001249313354492, 47.22059631347656, 64.70956420898438, -7.128196716308594, -7.316181182861328, -3.044178009033203, 55.697059631347656, 16.57623291015625, 21.305694580078125, 65.66337585449219, 18.874664306640625, -2.817607879638672, 20.116252899169922, 26.103755950927734, 38.85845184326172, 49.66072082519531, 47.52735900878906, 41.87123107910156, 28.973419189453125, 30.344594955444336, -3.440765380859375, 28.338882446289062, 46.495147705078125, 61.519500732421875, 52.43670654296875, -1.199371337890625, 23.818649291992188, 45.33930206298828, 92.20222473144531, 9.303852081298828, -6.030738830566406, 53.96150207519531, 24.798431396484375, 26.64836311340332, 15.541702270507812, 38.4017333984375, 28.401084899902344, 70.33526611328125, 23.98101043701172, 65.19816589355469, -4.09477424621582, 38.79808807373047, 32.45353698730469, -1.502349853515625, 69.6832275390625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000474.npy"} +{"epoch": 0.6960352422907489, "step": 475, "batch_size": 64, "mean": 34.429412841796875, "std": 30.452960968017578, "min": -12.9737548828125, "p10": 3.837327575683595, "median": 29.204971313476562, "p90": 66.20656356811526, "max": 148.33450317382812, "pos_frac": 0.921875, "sample": [2.21826171875, 11.07187271118164, 46.086021423339844, 6.824989318847656, 33.959869384765625, 21.22238540649414, 53.79197692871094, 5.251182556152344, 148.33450317382812, 53.17350769042969, 27.87751007080078, 27.843521118164062, 57.4976806640625, 9.313064575195312, 55.32916259765625, 18.318227767944336, 122.64566802978516, 32.55418395996094, 43.016876220703125, -6.152317047119141, 26.039249420166016, -12.9737548828125, 43.12904357910156, 19.961442947387695, 16.773616790771484, 43.94123840332031, 68.94225311279297, 26.60291862487793, 34.182987213134766, 30.218170166015625, 12.739313125610352, 24.860809326171875, 3.3069076538085938, 58.907833099365234, 10.284622192382812, 75.34577178955078, 44.238975524902344, 35.194053649902344, 40.48322296142578, 59.82328796386719, 18.485857009887695, 5.449098587036133, 28.1917724609375, -1.6378021240234375, 34.40129852294922, 31.989418029785156, -5.78875732421875, 8.550552368164062, 73.10980987548828, 47.15973663330078, 5.074974060058594, 21.099830627441406, 52.75444793701172, 8.0267333984375, 43.485618591308594, 16.115768432617188, 27.26226806640625, 109.1036376953125, 41.395103454589844, 96.9839859008789, 7.412322998046875, 48.06248474121094, -2.4547481536865234, 57.06885528564453], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000475.npy"} +{"epoch": 0.697503671071953, "step": 476, "batch_size": 64, "mean": 30.353801727294922, "std": 26.748226165771484, "min": -13.575729370117188, "p10": -2.375325775146484, "median": 28.592604637145996, "p90": 66.67832565307617, "max": 105.05415344238281, "pos_frac": 0.859375, "sample": [53.720611572265625, -1.1030635833740234, 0.45328330993652344, -10.538688659667969, 57.90668869018555, 34.26365661621094, 80.05931091308594, 51.002052307128906, 30.331157684326172, -9.773059844970703, 56.62287139892578, 40.448585510253906, 78.25645446777344, 50.49095153808594, 54.94822311401367, 31.098495483398438, 39.321434020996094, 17.052635192871094, 65.55218505859375, 60.623878479003906, 52.86180877685547, 35.68818664550781, -13.575729370117188, 24.183372497558594, 12.915559768676758, 29.262344360351562, -2.5973892211914062, 17.673110961914062, 3.3055496215820312, 11.479331970214844, 7.961658477783203, -1.857177734375, 4.602508544921875, 9.230537414550781, 47.87788391113281, 25.170780181884766, 21.986618041992188, 10.559457778930664, -4.952301025390625, 35.466094970703125, 5.897457122802734, 20.632797241210938, 45.682640075683594, -9.412017822265625, 3.3431148529052734, 12.500761032104492, 85.83251190185547, 67.57496643066406, 105.05415344238281, 58.930633544921875, 25.268234252929688, 17.71567153930664, 15.65875244140625, 31.25841522216797, -4.78790283203125, 27.92286491394043, 39.635765075683594, 67.16095733642578, 10.472957611083984, 32.43927001953125, 71.43084716796875, 41.79999542236328, 18.75921630859375, 43.89139175415039], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000476.npy"} +{"epoch": 0.6989720998531571, "step": 477, "batch_size": 64, "mean": 31.857627868652344, "std": 27.865074157714844, "min": -48.500282287597656, "p10": 0.07321891784668039, "median": 32.06553649902344, "p90": 63.10661010742188, "max": 104.81078338623047, "pos_frac": 0.890625, "sample": [21.59693145751953, 48.91667938232422, 18.41392707824707, 23.37976837158203, 78.74626922607422, 61.86102294921875, 63.235260009765625, 31.964805603027344, 28.24633026123047, 54.01570129394531, 33.3388671875, 20.45631980895996, 39.94328308105469, 62.806427001953125, 60.41334533691406, 15.896738052368164, 28.329071044921875, 21.590755462646484, 32.16626739501953, 43.13508605957031, 47.0128173828125, 34.09391784667969, 16.260025024414062, 5.133241653442383, -2.472412109375, 72.38121795654297, -7.870391845703125, 28.723724365234375, 17.1588077545166, 24.314035415649414, -5.700023651123047, 41.80082702636719, -48.500282287597656, -25.208526611328125, 47.563941955566406, 38.89404296875, 12.826889038085938, 38.991180419921875, 55.16204833984375, 22.420063018798828, 17.410140991210938, 0.7684364318847656, 57.493408203125, 66.37828826904297, 23.240325927734375, 34.80757141113281, 44.55652618408203, 51.839927673339844, 36.808258056640625, 9.918590545654297, 104.81078338623047, -23.90998077392578, 1.6171798706054688, 44.15220260620117, 2.0909500122070312, 68.6146240234375, 42.2445068359375, -0.2247314453125, 48.498199462890625, 17.221820831298828, 7.328971862792969, 28.457801818847656, 50.368202209472656, 102.958251953125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000477.npy"} +{"epoch": 0.7004405286343612, "step": 478, "batch_size": 64, "mean": 34.497291564941406, "std": 29.46787452697754, "min": -18.171356201171875, "p10": 2.762488174438477, "median": 26.741891860961914, "p90": 72.82818222045898, "max": 119.847900390625, "pos_frac": 0.9375, "sample": [13.915102005004883, 32.162776947021484, -0.03363800048828125, 7.135768890380859, 22.27263641357422, 22.45574951171875, 41.54200744628906, 16.950908660888672, 7.689201354980469, 14.260848999023438, -18.171356201171875, 52.177467346191406, 5.110403060913086, 66.12785339355469, 3.14404296875, 53.85450744628906, 18.782424926757812, 100.52369689941406, 26.515472412109375, 33.51543426513672, 2.5989646911621094, -4.01806640625, 12.190433502197266, 47.023780822753906, 40.19054412841797, 102.31565856933594, 31.69927978515625, 12.725568771362305, 8.409294128417969, 40.23906707763672, 119.847900390625, 59.988712310791016, 22.26244354248047, 68.49169921875, 47.5831298828125, 60.513648986816406, 85.72451782226562, 12.177757263183594, 28.87451171875, 14.963920593261719, 57.95960998535156, 58.38202667236328, 2.1740875244140625, -4.81158447265625, 64.92035675048828, 21.682388305664062, 43.489776611328125, 71.980712890625, 13.457000732421875, 64.07417297363281, 73.1913833618164, 34.02154541015625, 26.968311309814453, 41.77751159667969, 0.1587066650390625, 23.286643981933594, 77.58773803710938, 6.83929443359375, 88.56672668457031, 13.290885925292969, 4.666160583496094, 23.29410171508789, 46.03325653076172, 21.101844787597656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000478.npy"} +{"epoch": 0.7019089574155654, "step": 479, "batch_size": 64, "mean": 30.368532180786133, "std": 22.986536026000977, "min": -12.185871124267578, "p10": 0.2989475250244156, "median": 31.502525329589844, "p90": 62.25023574829102, "max": 76.89051818847656, "pos_frac": 0.890625, "sample": [6.301445007324219, 38.14228439331055, 44.02178192138672, 8.15899658203125, -5.3148956298828125, 48.90545654296875, 23.625167846679688, 6.037101745605469, 50.346710205078125, -4.122434616088867, 52.474021911621094, 31.70458984375, 18.899124145507812, 39.98658752441406, 64.71135711669922, 30.134109497070312, 59.047569274902344, 10.236862182617188, -8.866920471191406, -0.3267707824707031, -12.185871124267578, 76.12942504882812, 8.564802169799805, 14.956771850585938, 52.027740478515625, 41.91765594482422, 20.80048370361328, 62.30511474609375, 50.48793411254883, -7.480457305908203, 62.12218475341797, 48.54670715332031, 52.085853576660156, 43.97564697265625, 3.4957199096679688, 30.616058349609375, 23.61236572265625, 9.25360107421875, 3.291290283203125, 43.44943618774414, 39.423274993896484, 33.48612976074219, 67.1737060546875, 24.185684204101562, 5.480072021484375, -6.2972412109375, 33.781585693359375, 41.271881103515625, 41.34637451171875, 64.83368682861328, 31.300460815429688, 25.146766662597656, 1.7589569091796875, 16.187698364257812, 22.976242065429688, 34.138275146484375, 39.18658447265625, 51.103485107421875, 31.91118049621582, 76.89051818847656, 26.657424926757812, 67.24099731445312, 28.6669921875, 3.6606979370117188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000479.npy"} +{"epoch": 0.7033773861967695, "step": 480, "batch_size": 64, "mean": 36.273193359375, "std": 33.05205535888672, "min": -35.53260803222656, "p10": -1.4272773742675757, "median": 31.12323760986328, "p90": 79.5316505432129, "max": 127.28507995605469, "pos_frac": 0.890625, "sample": [17.473148345947266, 47.32502746582031, -2.3918609619140625, 32.10009765625, 8.977508544921875, 71.97647857666016, 53.796024322509766, 34.330711364746094, 28.216514587402344, 31.009994506835938, 16.787158966064453, 118.94855499267578, -11.999626159667969, 24.2127685546875, 54.58977127075195, 22.041534423828125, -8.073184967041016, 18.17218017578125, 58.32537841796875, 96.51387023925781, 29.798538208007812, -10.660774230957031, 8.099319458007812, 46.1873779296875, 16.26721954345703, 46.6033935546875, 36.25898742675781, 5.743417739868164, 36.27593231201172, 9.685302734375, 56.708831787109375, 84.98973846435547, 76.59484100341797, 24.68228530883789, 5.3401336669921875, 16.530685424804688, 32.679603576660156, 0.8234176635742188, 41.383888244628906, 29.137041091918945, -4.7586212158203125, -35.53260803222656, 59.79834747314453, 25.332130432128906, 127.28507995605469, 30.310728073120117, 68.55332946777344, 63.533355712890625, 17.048492431640625, 13.379579544067383, 1.8767547607421875, -6.8616180419921875, 36.89421081542969, 31.452842712402344, 82.25228881835938, 39.90597152709961, 20.996353149414062, 59.7309684753418, 31.236480712890625, 71.04869079589844, 80.790283203125, 15.093267440795898, 64.38583374023438, 122.2708511352539], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000480.npy"} +{"epoch": 0.7048458149779736, "step": 481, "batch_size": 64, "mean": 34.74099349975586, "std": 27.138439178466797, "min": -22.65024185180664, "p10": 0.9321571350097688, "median": 33.81380271911621, "p90": 69.24416122436524, "max": 95.69537353515625, "pos_frac": 0.890625, "sample": [32.37665557861328, -0.4157257080078125, 82.04096984863281, 34.67426681518555, 41.19792938232422, 13.483482360839844, 11.660354614257812, 53.31161880493164, 44.24068069458008, 61.02571105957031, 23.464881896972656, 14.696525573730469, 6.8672943115234375, -3.016185760498047, 32.953338623046875, 25.236404418945312, 23.51017951965332, 49.64567565917969, 61.653228759765625, 22.4803466796875, 83.31402587890625, 80.76445007324219, 66.21343231201172, 52.1540641784668, 35.439109802246094, 31.61548614501953, 40.127593994140625, 23.946739196777344, 12.397087097167969, -7.319938659667969, -19.421688079833984, 59.52998352050781, 20.054903030395508, -4.118583679199219, 67.9802017211914, 9.7392578125, 12.056900024414062, 59.728153228759766, 38.83361053466797, 44.7501220703125, 31.10035514831543, 34.70330810546875, 38.93387222290039, 94.40773010253906, 4.077217102050781, 4.223228454589844, 40.477874755859375, 52.71662139892578, 95.69537353515625, 44.58734130859375, 24.959564208984375, 15.862991333007812, -8.720909118652344, 66.30741119384766, 20.1981201171875, 41.89874267578125, -22.65024185180664, 31.83863067626953, 37.5841064453125, 69.78585815429688, 83.5938720703125, 23.85938835144043, 47.90293884277344, 11.207660675048828], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000481.npy"} +{"epoch": 0.7063142437591777, "step": 482, "batch_size": 64, "mean": 30.073631286621094, "std": 26.09050178527832, "min": -11.088953018188477, "p10": 2.3537872314453128, "median": 24.41065216064453, "p90": 65.52386322021485, "max": 107.620361328125, "pos_frac": 0.90625, "sample": [15.36175537109375, 9.494415283203125, 36.02703857421875, 25.5123291015625, 66.5223388671875, 72.18896484375, 27.40503692626953, 85.9694595336914, 9.639999389648438, 2.3145065307617188, 15.391044616699219, 16.86286163330078, 17.143478393554688, 3.419219970703125, 76.9925765991211, 62.817840576171875, 64.98641967773438, 26.97258758544922, 37.220703125, 12.543956756591797, 5.071987152099609, -6.066558837890625, 27.994598388671875, 59.83155059814453, 7.886631011962891, 20.051483154296875, -2.2414321899414062, 51.92340850830078, 75.33283996582031, 10.365455627441406, -0.013336181640625, 65.75419616699219, 47.264434814453125, 31.60839080810547, 11.497909545898438, 13.314592361450195, 2.4454421997070312, 32.65171813964844, 62.84405517578125, 25.257904052734375, 46.32110595703125, 7.632030487060547, -11.088953018188477, -0.8344802856445312, 30.88249397277832, 17.146514892578125, -2.541259765625, 63.94115447998047, 107.620361328125, 11.592376708984375, 9.456428527832031, 23.563400268554688, 6.9438323974609375, 31.487457275390625, 38.213470458984375, 46.55923080444336, 55.572208404541016, 32.46534729003906, 21.05914306640625, 8.377250671386719, 18.542083740234375, 13.146453857421875, 60.21405029296875, 60.878883361816406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000482.npy"} +{"epoch": 0.7077826725403817, "step": 483, "batch_size": 64, "mean": 35.49970245361328, "std": 27.894441604614258, "min": -18.48330307006836, "p10": 2.74029655456543, "median": 33.16633605957031, "p90": 61.394462585449226, "max": 136.12245178222656, "pos_frac": 0.921875, "sample": [-11.142059326171875, 62.01177978515625, 24.246932983398438, 56.18476486206055, 17.733749389648438, 36.38838195800781, 30.2337646484375, 29.69472885131836, 58.058685302734375, 45.318870544433594, 70.85966491699219, 77.672119140625, 56.320396423339844, -5.863739013671875, 33.44416809082031, 32.33169174194336, 85.12586975097656, 32.88850402832031, 3.8947677612304688, 30.24933624267578, 10.403959274291992, 11.766824722290039, 53.072967529296875, 83.01048278808594, 2.6886367797851562, 56.34711456298828, 39.45137023925781, 18.61197280883789, 16.682342529296875, 59.78599548339844, 2.8608360290527344, 40.88732147216797, 1.6775131225585938, 107.01302337646484, 35.34974670410156, 27.845199584960938, 52.32750701904297, 47.35649871826172, 29.093505859375, 58.48710632324219, 46.03984069824219, 37.807373046875, 39.92412567138672, 9.457145690917969, 32.65888595581055, 22.14128875732422, 12.032432556152344, 21.977615356445312, 59.95405578613281, -2.7109222412109375, 38.264122009277344, -14.878021240234375, 11.379173278808594, 136.12245178222656, 23.886550903320312, 52.76123046875, 51.80225372314453, 38.05401611328125, 41.8568115234375, 7.317405700683594, 30.210556030273438, 48.16046905517578, 25.873130798339844, -18.48330307006836], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000483.npy"} +{"epoch": 0.7092511013215859, "step": 484, "batch_size": 64, "mean": 35.594505310058594, "std": 30.20879554748535, "min": -5.215751647949219, "p10": 2.1252201080322273, "median": 29.749731063842773, "p90": 75.06423950195314, "max": 130.16082763671875, "pos_frac": 0.9375, "sample": [39.96672821044922, 17.46599578857422, 6.155555725097656, 65.48622131347656, 1.8198928833007812, 22.769271850585938, 12.899307250976562, 65.25506591796875, 76.40951538085938, 37.096927642822266, 7.063449859619141, 31.511322021484375, 34.89894104003906, 60.98982238769531, 48.0887336730957, 41.17420196533203, 84.61223602294922, 2.8376502990722656, 25.27886199951172, 15.60845947265625, 39.6492919921875, 28.964794158935547, 51.33393096923828, 67.56640625, 120.874267578125, -4.45793342590332, 27.287628173828125, 92.49838256835938, -5.215751647949219, 10.390228271484375, 0.9523048400878906, 15.700000762939453, 70.47732543945312, 64.7676010131836, 21.28339385986328, 12.069091796875, 25.627197265625, 37.19752502441406, 11.828788757324219, 38.7044677734375, 7.006324768066406, 18.875343322753906, 16.929290771484375, 52.560184478759766, 71.92526245117188, -3.418731689453125, 40.41455078125, 39.68536376953125, 6.570528030395508, 0.32254791259765625, 3.833494186401367, 15.874565124511719, 84.97664642333984, 47.61309051513672, 14.969459533691406, 30.53466796875, 130.16082763671875, 54.406009674072266, 84.22400665283203, -1.8223514556884766, 51.396324157714844, 27.37561798095703, 9.614336013793945, 49.13404083251953], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000484.npy"} +{"epoch": 0.71071953010279, "step": 485, "batch_size": 64, "mean": 40.27811813354492, "std": 32.17557907104492, "min": -18.691848754882812, "p10": 0.78987216949463, "median": 37.30965232849121, "p90": 86.26026077270508, "max": 107.37789916992188, "pos_frac": 0.921875, "sample": [68.5755615234375, 77.60836791992188, 86.75193786621094, 5.9626617431640625, 85.1130142211914, 1.9172744750976562, 51.20189666748047, -3.89910888671875, 24.969009399414062, 59.648529052734375, 30.814247131347656, 10.719734191894531, 72.42765045166016, 35.866722106933594, 19.00025177001953, 107.37789916992188, 4.03594970703125, 0.027914047241210938, 92.67849731445312, 65.1632080078125, 50.51478576660156, 53.265960693359375, 12.463336944580078, 44.67119598388672, 42.417266845703125, 100.66156005859375, -6.569725036621094, 0.3066997528076172, 34.04298400878906, -2.9770278930664062, 15.072433471679688, 40.09159851074219, 28.39708709716797, 7.18145751953125, 60.384246826171875, 13.760711669921875, 69.65707397460938, 64.09194946289062, 20.986560821533203, 56.374786376953125, 39.54679870605469, 41.0137939453125, 53.312713623046875, 29.980209350585938, 6.217311859130859, 54.13334655761719, 4.529695510864258, 57.08405303955078, 27.24190902709961, -16.898834228515625, 102.70472717285156, 81.01396942138672, 26.32004165649414, 36.93724060058594, 34.18464660644531, 21.721939086914062, 101.84783935546875, 5.8841400146484375, -18.691848754882812, 24.6068115234375, 37.682064056396484, 83.95423889160156, 87.43780517578125, 55.27873229980469], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000485.npy"} +{"epoch": 0.7121879588839941, "step": 486, "batch_size": 64, "mean": 38.945411682128906, "std": 30.784053802490234, "min": -17.51525115966797, "p10": 0.9036026000976562, "median": 36.53977584838867, "p90": 79.24257583618164, "max": 112.56798553466797, "pos_frac": 0.90625, "sample": [77.10315704345703, 31.13446807861328, 38.37718200683594, 21.01834487915039, 65.48289489746094, 60.61505126953125, 66.5750503540039, 0.893646240234375, 41.64439392089844, 12.672294616699219, 45.44782257080078, 64.31318664550781, 70.21809387207031, 20.316871643066406, 13.545646667480469, 65.44351196289062, 29.432464599609375, 10.982109069824219, 10.602195739746094, 80.15946960449219, 98.58142852783203, 22.771507263183594, 13.595748901367188, -2.2135391235351562, -6.915214538574219, 35.04602813720703, 58.242698669433594, 58.483619689941406, 32.397705078125, 65.55164337158203, 58.582855224609375, 13.646247863769531, -13.016433715820312, 0.9268341064453125, 49.08944320678711, 57.88349151611328, 18.38837432861328, 89.6889419555664, 58.26676940917969, 67.38392639160156, 10.992523193359375, 23.089279174804688, 38.03352355957031, -2.1341476440429688, 58.881980895996094, 84.96273803710938, 80.21368408203125, 46.31382751464844, 57.85514450073242, 99.49569702148438, 3.3491897583007812, 31.145475387573242, 14.66465950012207, 30.056724548339844, 29.1683349609375, 22.49773406982422, 38.41089630126953, 68.71009063720703, -10.655097961425781, 3.0277252197265625, 62.850440979003906, -17.51525115966797, 4.163230895996094, 112.56798553466797], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000486.npy"} +{"epoch": 0.7136563876651982, "step": 487, "batch_size": 64, "mean": 37.19938278198242, "std": 26.885433197021484, "min": -6.788246154785156, "p10": 5.548789978027345, "median": 34.09355545043945, "p90": 73.12206039428713, "max": 97.73538208007812, "pos_frac": 0.9375, "sample": [53.73793029785156, 75.64063262939453, 20.22425079345703, 35.08793640136719, 84.48111724853516, 44.98322296142578, 48.331939697265625, 17.869384765625, 45.59877014160156, 6.641349792480469, 10.358551025390625, 33.09917449951172, 67.23004150390625, 0.363311767578125, 16.59589385986328, 59.85218811035156, 49.72138977050781, 13.788623809814453, 41.8963508605957, 55.4552001953125, 54.736785888671875, 62.118621826171875, 16.220489501953125, 86.6077880859375, 22.983402252197266, 50.868019104003906, 27.73199462890625, 20.468238830566406, 52.180763244628906, 12.233621597290039, 49.18511199951172, -6.788246154785156, 17.819808959960938, 21.234390258789062, 56.79441833496094, 9.734455108642578, 60.82017517089844, -6.740081787109375, 56.85634994506836, 56.91913986206055, 35.791358947753906, 66.68594360351562, 6.741708755493164, 5.117612838745117, 6.1442108154296875, 78.09293365478516, -2.0332260131835938, 12.395614624023438, 85.34366607666016, 18.571815490722656, 49.911041259765625, 31.36785888671875, 63.47726058959961, 54.15704345703125, 19.68384552001953, 5.293609619140625, 28.671417236328125, 97.73538208007812, 67.24539184570312, 28.405349731445312, -4.66046142578125, 88.9245834350586, 7.7838592529296875, 26.970169067382812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000487.npy"} +{"epoch": 0.7151248164464024, "step": 488, "batch_size": 64, "mean": 32.08653259277344, "std": 21.3510799407959, "min": -4.063652038574219, "p10": 10.163520812988281, "median": 30.793113708496094, "p90": 61.48150100708008, "max": 77.51725769042969, "pos_frac": 0.953125, "sample": [-1.6644363403320312, 35.659481048583984, 52.85075378417969, 17.843217849731445, 77.51725769042969, -4.063652038574219, 47.548583984375, 10.440073013305664, 16.766433715820312, 14.956008911132812, 33.47571563720703, 31.173904418945312, 38.917091369628906, 20.217483520507812, 70.53649139404297, 61.521484375, 33.243560791015625, 13.036359786987305, 10.218475341796875, 41.830841064453125, 14.973867416381836, 10.139968872070312, 54.22547912597656, 23.78946876525879, 30.412322998046875, 41.2877197265625, 24.558147430419922, 35.349246978759766, 52.560546875, 6.521360397338867, 32.7537841796875, 50.49077606201172, 11.783134460449219, 17.279212951660156, 73.40199279785156, 35.4920539855957, 61.388206481933594, 75.39036560058594, 44.700653076171875, 19.013099670410156, 51.01256561279297, 76.01560974121094, 17.064769744873047, 26.44683074951172, 25.099702835083008, -2.4857025146484375, 15.215919494628906, 38.946929931640625, 11.668403625488281, 13.836383819580078, 20.938493728637695, 35.03572082519531, 61.1550407409668, 20.562231063842773, 26.52552032470703, 33.35286331176758, 2.4777488708496094, 48.166221618652344, 11.091348648071289, 3.426746368408203, 15.997135162353516, 51.50469970703125, 36.988555908203125, 75.95783996582031], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000488.npy"} +{"epoch": 0.7165932452276065, "step": 489, "batch_size": 64, "mean": 31.778711318969727, "std": 27.663244247436523, "min": -8.21101188659668, "p10": -1.713649940490722, "median": 27.261679649353027, "p90": 64.39057121276856, "max": 118.59872436523438, "pos_frac": 0.875, "sample": [30.983566284179688, 8.825843811035156, 34.556968688964844, 25.339996337890625, -8.21101188659668, 43.452728271484375, -2.4091243743896484, 59.70030975341797, 16.266372680664062, 21.915081024169922, 19.960697174072266, 6.370464324951172, 31.212692260742188, 19.940696716308594, 48.9031982421875, 19.88067626953125, 16.689327239990234, 70.82490539550781, 17.417198181152344, 62.85075759887695, 51.27525329589844, -1.0301189422607422, 16.404327392578125, 46.14777374267578, 21.048492431640625, -6.015228271484375, 20.64153289794922, -5.484767913818359, 19.491188049316406, 1.5882644653320312, 18.22028350830078, 29.635726928710938, 3.517261505126953, 26.334672927856445, 106.03733825683594, 40.68896484375, 36.740081787109375, 6.462982177734375, 41.062828063964844, 62.64940643310547, 54.64634704589844, 2.226747512817383, 88.49114990234375, 1.3670825958251953, 59.55168914794922, 65.05049133300781, 20.069908142089844, 28.295631408691406, 24.862102508544922, -6.3489990234375, 76.99053192138672, 62.347137451171875, 36.53340148925781, 73.6697769165039, 28.349939346313477, 28.18868637084961, 118.59872436523438, 38.424591064453125, 59.27879333496094, -6.990348815917969, 9.511199951171875, 44.72801971435547, 48.1138916015625, -2.006591796875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000489.npy"} +{"epoch": 0.7180616740088106, "step": 490, "batch_size": 64, "mean": 31.28500747680664, "std": 29.745311737060547, "min": -30.953392028808594, "p10": -2.26859951019287, "median": 25.51198673248291, "p90": 72.8109733581543, "max": 102.01776123046875, "pos_frac": 0.859375, "sample": [21.032989501953125, 3.6808547973632812, 38.25250244140625, -1.277475357055664, 29.612579345703125, 4.943794250488281, 39.92274475097656, -14.24831771850586, 62.63996124267578, -2.804502487182617, -9.321037292480469, -7.271881103515625, 22.315582275390625, 15.739664077758789, 41.812644958496094, 10.94921875, -30.953392028808594, 64.84564971923828, 11.795440673828125, 1.5639266967773438, -0.6351261138916016, 26.35711669921875, 11.816112518310547, 8.776704788208008, 8.576669692993164, 73.21892547607422, 32.48124694824219, 25.895437240600586, -2.6933670043945312, 57.10161590576172, 25.128536224365234, 24.222015380859375, 37.03215026855469, 71.18305969238281, 39.053123474121094, 17.355857849121094, 71.85908508300781, 43.79176330566406, 102.01776123046875, 7.727970123291016, 56.54051208496094, 86.44902801513672, 68.57919311523438, 74.01544189453125, 37.76507568359375, 92.84451293945312, -6.6092071533203125, 44.59699249267578, 8.776702880859375, 36.39588928222656, 65.06236267089844, 14.181333541870117, 52.38250732421875, 8.63037109375, 55.758819580078125, 51.89055633544922, 8.176483154296875, 5.309783935546875, 19.500259399414062, 87.60301971435547, 18.182186126708984, 84.63540649414062, 16.956283569335938, 31.11933708190918], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000490.npy"} +{"epoch": 0.7195301027900147, "step": 491, "batch_size": 64, "mean": 28.079666137695312, "std": 27.855226516723633, "min": -31.170894622802734, "p10": -0.37073593139648364, "median": 24.21238899230957, "p90": 62.17767715454103, "max": 117.2748794555664, "pos_frac": 0.890625, "sample": [57.86668395996094, -7.190908432006836, 90.73269653320312, 34.87300109863281, 53.34748840332031, 18.18707847595215, 11.911575317382812, 1.7764892578125, 24.641468048095703, 21.212295532226562, 16.990703582763672, 25.7476806640625, 13.918067932128906, 26.214454650878906, 43.200714111328125, 25.665332794189453, 7.534210205078125, 18.959091186523438, 11.829010009765625, 38.39915466308594, 31.442031860351562, -2.4869213104248047, 58.291168212890625, 3.5089492797851562, 15.127164840698242, 18.169355392456055, 58.799842834472656, -5.547262191772461, 41.98051452636719, 9.570261001586914, 32.65257263183594, 68.7325439453125, -17.630348205566406, 6.48988151550293, 37.03141784667969, 52.70957946777344, 0.37268829345703125, 89.9627914428711, 51.288604736328125, 33.14686584472656, 29.5191650390625, 4.624509811401367, 117.2748794555664, 7.653772354125977, 3.221588134765625, 41.12373733520508, -0.6893463134765625, 48.702232360839844, 2.8724517822265625, 54.27943801879883, 12.82632827758789, 2.3216552734375, 63.62532043457031, 25.06293487548828, 19.05852699279785, 28.296035766601562, 82.34024810791016, 19.692337036132812, 23.783309936523438, 22.277023315429688, 79.1263656616211, -6.94439697265625, 28.793380737304688, -31.170894622802734], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000491.npy"} +{"epoch": 0.7209985315712188, "step": 492, "batch_size": 64, "mean": 26.099708557128906, "std": 30.116130828857422, "min": -31.390335083007812, "p10": -14.381443977355955, "median": 26.454147338867188, "p90": 70.02894439697266, "max": 90.36957550048828, "pos_frac": 0.796875, "sample": [11.327232360839844, 30.789710998535156, 22.696083068847656, -9.977178573608398, 33.514404296875, -17.684160232543945, 27.033905029296875, 32.58496856689453, 21.196949005126953, 26.299457550048828, 15.365669250488281, 20.54408073425293, -25.78583526611328, 41.373199462890625, -31.390335083007812, 90.36957550048828, -5.482536315917969, 54.25146484375, 12.196737289428711, 89.05291748046875, -10.562471389770508, 25.791685104370117, 39.521820068359375, 20.94390869140625, 74.60194396972656, 62.011783599853516, 57.433692932128906, 28.316375732421875, 30.761791229248047, 21.644996643066406, -5.999900817871094, 0.7513484954833984, 6.051425933837891, -23.80632781982422, -12.503450393676758, 67.6597900390625, 71.04429626464844, 8.430316925048828, -25.366836547851562, 44.721473693847656, 64.38369750976562, 7.18743896484375, 46.025306701660156, 13.457212448120117, -4.298763275146484, 73.2214126586914, 38.760929107666016, 21.034015655517578, 54.712432861328125, 26.608837127685547, 28.368377685546875, 40.30467987060547, 31.07353973388672, -15.186298370361328, 29.31005859375, 41.149417877197266, 71.34100341796875, 13.367130279541016, 3.3482398986816406, 81.76187133789062, -27.895078659057617, 51.47879409790039, 14.11231803894043, 47.03082275390625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000492.npy"} +{"epoch": 0.7224669603524229, "step": 493, "batch_size": 64, "mean": 32.915733337402344, "std": 26.52034568786621, "min": -27.147605895996094, "p10": -2.360428619384765, "median": 30.162936210632324, "p90": 63.110100555419926, "max": 96.30465698242188, "pos_frac": 0.875, "sample": [44.2149772644043, 17.56307601928711, 96.30465698242188, 42.61035919189453, 4.7509918212890625, 45.197906494140625, 44.33859634399414, 32.0438232421875, -8.24456787109375, -27.147605895996094, 87.96514129638672, 20.582778930664062, 48.856353759765625, 17.763290405273438, 29.055038452148438, 47.160247802734375, 13.373664855957031, 34.1185302734375, 35.51937484741211, 9.5059814453125, 28.027423858642578, 58.955501556396484, 88.4402084350586, 72.53531646728516, 60.420413970947266, 35.761573791503906, 46.86212158203125, -1.7423477172851562, 12.298318862915039, 27.88751983642578, -9.913299560546875, 30.76230812072754, 62.47583770751953, 24.094085693359375, 75.97547149658203, 27.490327835083008, 24.007869720458984, 63.381927490234375, 26.556549072265625, -3.300628662109375, 19.102088928222656, 35.450592041015625, 29.56356430053711, 20.653099060058594, 58.98609161376953, 92.16537475585938, 11.20954704284668, 38.487083435058594, 49.75880432128906, 52.70049285888672, 47.129676818847656, 11.594144821166992, -2.6253204345703125, 46.96192169189453, -6.927604675292969, -10.169181823730469, 6.209384918212891, 23.93191909790039, 59.1053466796875, 25.195812225341797, 3.0030288696289062, 34.34832763671875, 20.101333618164062, 54.16249084472656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000493.npy"} +{"epoch": 0.723935389133627, "step": 494, "batch_size": 64, "mean": 35.46459197998047, "std": 30.0831356048584, "min": -25.38532257080078, "p10": -1.355794525146484, "median": 30.03800678253174, "p90": 74.9020278930664, "max": 99.82002258300781, "pos_frac": 0.859375, "sample": [-0.9773292541503906, 46.87248992919922, 21.327774047851562, 73.92727661132812, 68.33971405029297, 59.16770935058594, -17.36682891845703, 99.82002258300781, 26.61577796936035, 7.576333999633789, 50.076148986816406, 28.752010345458984, -11.480171203613281, 56.08973693847656, 28.871610641479492, 75.31977844238281, 60.06745147705078, -1.6765213012695312, 53.5321044921875, 86.25969696044922, 4.352985382080078, -25.38532257080078, 24.627548217773438, 40.094757080078125, 44.22666931152344, 21.674007415771484, 62.24793243408203, 83.37274932861328, 33.5682487487793, 22.556758880615234, 24.751075744628906, -5.394439697265625, 36.86428451538086, 30.752975463867188, 65.3305892944336, 48.405982971191406, 68.027587890625, 17.84522247314453, 3.6258392333984375, 95.21023559570312, 72.72154998779297, -0.9648323059082031, -1.5179939270019531, 13.097129821777344, 10.704559326171875, 29.32303810119629, 58.727386474609375, 37.83528137207031, 53.934486389160156, 21.963726043701172, 12.495645523071289, -12.105926513671875, 95.806640625, 40.171695709228516, 54.378936767578125, 19.658740997314453, 86.58111572265625, 18.62970733642578, 38.649620056152344, 54.46171569824219, 3.4959335327148438, 25.459064483642578, 24.114105224609375, 4.2419891357421875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000494.npy"} +{"epoch": 0.7254038179148311, "step": 495, "batch_size": 64, "mean": 32.37921142578125, "std": 27.76520347595215, "min": -36.897186279296875, "p10": 0.625724029541016, "median": 31.174007415771484, "p90": 63.72550811767579, "max": 107.20719909667969, "pos_frac": 0.90625, "sample": [55.21271514892578, 2.629617691040039, 77.06730651855469, 52.84123229980469, 1.0661239624023438, 18.84208106994629, -8.83721923828125, 6.549224853515625, 45.96910095214844, 12.242904663085938, 19.83422088623047, 38.19731140136719, 19.16785430908203, -9.578550338745117, 51.05413818359375, 10.16252326965332, -8.27165412902832, 67.45836639404297, 33.687713623046875, 46.025848388671875, 19.92080307006836, -36.897186279296875, 38.19865036010742, 9.786418914794922, 31.487266540527344, 94.31887817382812, 58.44585418701172, -8.095539093017578, 49.624534606933594, 30.860748291015625, 34.296600341796875, 62.678466796875, 13.781726837158203, 9.780227661132812, 38.23194122314453, 107.20719909667969, 53.331939697265625, 8.24612808227539, 49.00157165527344, 58.759552001953125, 52.048988342285156, 46.458213806152344, 15.496315002441406, 22.757659912109375, 20.72562026977539, 38.964447021484375, -9.958419799804688, 18.78436279296875, 27.801942825317383, 0.436981201171875, 12.063621520996094, 10.761848449707031, 26.67877197265625, 52.708282470703125, 62.518768310546875, 33.703277587890625, 16.597793579101562, 37.972721099853516, 29.45740509033203, 58.01512145996094, 64.17424011230469, 96.98150634765625, 66.98406982421875, 15.849266052246094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000495.npy"} +{"epoch": 0.7268722466960352, "step": 496, "batch_size": 64, "mean": 28.696502685546875, "std": 25.869693756103516, "min": -29.316619873046875, "p10": -0.26025428771972525, "median": 24.60771369934082, "p90": 63.10016708374024, "max": 99.02214813232422, "pos_frac": 0.890625, "sample": [23.393035888671875, 2.3224639892578125, 56.670166015625, 63.58998107910156, 16.09039306640625, 84.02322387695312, 24.8497314453125, 35.809051513671875, 40.86289596557617, 14.97332763671875, 41.99119186401367, 1.0203170776367188, 22.63665771484375, 21.416366577148438, 71.87700653076172, 10.683917999267578, 13.099594116210938, 28.057044982910156, 23.982959747314453, -8.714065551757812, 30.732391357421875, 60.18950653076172, 75.95254516601562, 36.28882598876953, 99.02214813232422, 47.58735275268555, 9.002655029296875, 56.55763244628906, 25.71527862548828, -1.241607666015625, 19.759933471679688, 14.612405776977539, 6.697906494140625, 24.36569595336914, 13.444602966308594, -29.316619873046875, 20.553131103515625, 13.614471435546875, 9.337287902832031, 17.431312561035156, -0.8090705871582031, 18.20201301574707, 32.25717544555664, -16.39122772216797, 31.078109741210938, 63.680320739746094, 33.77641296386719, 46.194236755371094, 90.55638122558594, 61.95726776123047, 4.0688323974609375, 4.17132568359375, 38.392311096191406, 40.764556884765625, 28.092506408691406, 43.084564208984375, 37.97726058959961, 47.13264465332031, 7.582736968994141, -5.5378265380859375, 58.93152618408203, 30.864437103271484, 6.783912658691406, -5.176307678222656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000496.npy"} +{"epoch": 0.7283406754772394, "step": 497, "batch_size": 64, "mean": 28.408655166625977, "std": 28.08185577392578, "min": -20.001129150390625, "p10": -2.7525682449340816, "median": 20.523695945739746, "p90": 70.17958908081057, "max": 88.09044647216797, "pos_frac": 0.859375, "sample": [4.004306793212891, 64.66168975830078, 15.509674072265625, 60.044708251953125, 3.327392578125, 14.804443359375, 83.71427917480469, 0.7346649169921875, 9.403512954711914, 2.332155227661133, 17.016769409179688, 58.123931884765625, 83.77119445800781, 30.64307403564453, 9.543731689453125, -20.001129150390625, 88.09044647216797, 56.58506774902344, 36.43804168701172, 21.396453857421875, 17.947799682617188, -2.9765090942382812, 75.60181427001953, 5.345069885253906, -4.3372344970703125, 19.366056442260742, 11.049484252929688, -9.784271240234375, 31.966445922851562, -1.8145637512207031, 59.12571716308594, 30.23113250732422, 14.554996490478516, 59.15070343017578, 22.503080368041992, 80.03202819824219, 10.626922607421875, 72.54440307617188, 23.638614654541016, 4.874076843261719, 45.427635192871094, 42.460411071777344, -4.995687484741211, 78.76995849609375, 13.08676528930664, 19.650938034057617, 50.794891357421875, 5.6862945556640625, 50.32377624511719, 42.12799835205078, 54.63985061645508, 9.173194885253906, 31.495075225830078, 43.22930908203125, 6.420316696166992, 19.177276611328125, 4.7382659912109375, 62.53934860229492, -2.230039596557617, 24.10149383544922, 53.037689208984375, -19.76739501953125, 38.09980010986328, -9.62343978881836], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000497.npy"} +{"epoch": 0.7298091042584435, "step": 498, "batch_size": 64, "mean": 30.984760284423828, "std": 27.986759185791016, "min": -20.92620849609375, "p10": -2.9834363937377923, "median": 25.839611053466797, "p90": 68.45401458740234, "max": 109.10934448242188, "pos_frac": 0.875, "sample": [15.457342147827148, 38.75053024291992, 9.097450256347656, 4.641357421875, 59.561485290527344, 20.625991821289062, 68.24392700195312, 68.54405212402344, 40.64905548095703, 25.83716583251953, 25.159984588623047, 21.65222930908203, 29.666763305664062, 56.64368438720703, 10.576309204101562, 72.68724060058594, 37.054229736328125, 54.430320739746094, 19.7093505859375, 85.95889282226562, 18.400650024414062, 21.923690795898438, -14.520599365234375, -4.452384948730469, -20.92620849609375, 34.12049102783203, 5.54876708984375, 8.326377868652344, 27.645713806152344, 23.652969360351562, 21.534807205200195, 25.681852340698242, -16.1361083984375, 101.69866180419922, 6.825920104980469, -3.242156982421875, 4.053779602050781, 16.340652465820312, 38.82582092285156, 69.46064758300781, 109.10934448242188, 22.309547424316406, 38.43145751953125, 40.09852981567383, 84.95881652832031, 61.853111267089844, 2.1374969482421875, 43.95263671875, 43.992366790771484, 45.92017364501953, 38.977569580078125, 10.695598602294922, 27.1761474609375, 38.82014465332031, -11.546066284179688, 10.173309326171875, 51.21703338623047, 21.74225616455078, 64.59004974365234, 25.842056274414062, -2.3797550201416016, 51.940765380859375, 39.929534912109375, -6.628213882446289], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000498.npy"} +{"epoch": 0.7312775330396476, "step": 499, "batch_size": 64, "mean": 31.55392837524414, "std": 30.621047973632812, "min": -47.15885925292969, "p10": -1.491476058959961, "median": 31.013694763183594, "p90": 67.76062240600587, "max": 111.42703247070312, "pos_frac": 0.859375, "sample": [67.64329528808594, 42.10066604614258, 30.991065979003906, -10.106546401977539, 15.432365417480469, 39.42071533203125, 50.326019287109375, 35.44099426269531, -0.8167037963867188, -11.39813232421875, 6.650976181030273, 67.81090545654297, 3.9622058868408203, 81.96533203125, 18.070350646972656, 60.574440002441406, 42.18043518066406, 33.353004455566406, 111.42703247070312, 41.97468566894531, 40.16954040527344, 24.134532928466797, 59.57000732421875, 2.5413436889648438, 9.488067626953125, 33.74554443359375, 49.29901885986328, 59.71903991699219, -1.4327564239501953, 31.492759704589844, 18.22763442993164, 0.2719879150390625, 64.48238372802734, 23.03594970703125, 22.058273315429688, 36.52838897705078, 28.838211059570312, 63.36293029785156, 31.03632354736328, -47.15885925292969, 11.305997848510742, 27.973854064941406, 16.018829345703125, 81.3860092163086, 51.282859802246094, 27.644134521484375, 69.5053939819336, 13.047859191894531, 1.3800582885742188, 43.858150482177734, 26.948951721191406, 77.98101806640625, 27.71825408935547, 65.32522583007812, 5.611019134521484, 17.31987762451172, 47.2833251953125, 39.01500701904297, -33.630035400390625, -17.66172218322754, -4.916316986083984, 39.48097229003906, -1.516641616821289, 110.67579650878906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000499.npy"} +{"epoch": 0.7327459618208517, "step": 500, "batch_size": 64, "mean": 33.82952117919922, "std": 29.376726150512695, "min": -19.078529357910156, "p10": 2.8274232864379893, "median": 26.33731746673584, "p90": 66.26198883056642, "max": 137.1235809326172, "pos_frac": 0.90625, "sample": [3.9797420501708984, 17.152660369873047, 14.567955017089844, 137.1235809326172, 30.962913513183594, 61.61417770385742, 68.01683044433594, 71.74461364746094, 20.531116485595703, 32.89311981201172, 62.1673583984375, 23.032379150390625, 24.621337890625, 33.05024337768555, -0.6707820892333984, 6.118408203125, 24.375457763671875, -0.5758056640625, 24.133304595947266, 30.456451416015625, 88.75797271728516, -10.039382934570312, 30.705673217773438, 46.69971466064453, 19.141525268554688, 121.65202331542969, 9.687431335449219, 62.07020568847656, 44.81761932373047, -19.078529357910156, 18.949493408203125, 88.69021606445312, 29.407028198242188, 16.706480026245117, 23.504966735839844, 47.68405532836914, 30.383121490478516, 55.95105743408203, 35.69287872314453, 9.5081787109375, 28.05329704284668, 49.010501861572266, 12.304328918457031, 59.133453369140625, 17.096092224121094, -1.9416255950927734, 44.35022735595703, 24.005760192871094, 2.3335723876953125, 46.49052429199219, 101.77967834472656, 13.779075622558594, 39.43646240234375, 14.510360717773438, 48.18316650390625, 38.86479949951172, 19.66387939453125, 17.66021728515625, 32.65834045410156, 24.580339431762695, -3.5523815155029297, 58.54961395263672, 20.12880516052246, 21.823974609375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000500.npy"} +{"epoch": 0.7342143906020558, "step": 501, "batch_size": 64, "mean": 37.292991638183594, "std": 29.057201385498047, "min": -12.509441375732422, "p10": 5.631687164306641, "median": 34.424964904785156, "p90": 73.73782653808594, "max": 128.81259155273438, "pos_frac": 0.9375, "sample": [19.583633422851562, 38.318668365478516, 91.64916229248047, 5.6201629638671875, 41.5474739074707, 48.25801086425781, 9.819507598876953, 75.6893539428711, 43.12532043457031, 2.3806533813476562, 47.800071716308594, 9.921710968017578, 58.69822692871094, 16.669090270996094, 128.81259155273438, 23.16376495361328, 76.69480895996094, 18.21782684326172, 31.85251808166504, 74.1844253540039, 60.836097717285156, 47.64635467529297, 6.3021240234375, 19.6623477935791, 56.967742919921875, 12.837387084960938, 27.820297241210938, 43.0584716796875, 66.62910461425781, 18.346145629882812, -1.9744338989257812, 29.30279541015625, 4.102943420410156, 20.617794036865234, 34.499847412109375, 123.46360778808594, 23.510498046875, 44.1503791809082, 37.252418518066406, 50.91841125488281, 57.68308639526367, 47.44666290283203, 26.8873348236084, 9.706588745117188, 64.82002258300781, 70.51504516601562, -12.509441375732422, 34.35008239746094, 31.165714263916016, -3.0836410522460938, 27.36212158203125, 18.436447143554688, 9.628849029541016, -11.867149353027344, 5.658576965332031, 10.589637756347656, 44.463233947753906, 38.345726013183594, 73.891845703125, 48.17694091796875, 54.163360595703125, 67.63566589355469, 73.37844848632812, 11.949151992797852], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000501.npy"} +{"epoch": 0.73568281938326, "step": 502, "batch_size": 64, "mean": 33.75102996826172, "std": 21.826812744140625, "min": -11.148231506347656, "p10": 3.6179193496704127, "median": 37.08907127380371, "p90": 59.08996772766114, "max": 102.76698303222656, "pos_frac": 0.9375, "sample": [47.29993438720703, -3.6458473205566406, 2.082387924194336, 59.803958892822266, 26.163436889648438, 39.12519073486328, -8.536262512207031, 10.7310791015625, 68.33824157714844, 37.62247085571289, 40.07798767089844, 60.02720642089844, 49.445777893066406, 33.327903747558594, 56.22489929199219, -5.563621520996094, 27.961647033691406, 2.5184688568115234, 14.77801513671875, 20.00201416015625, 42.73473358154297, 29.892475128173828, 35.00914001464844, 42.263832092285156, 7.22613525390625, 45.78353500366211, 27.749534606933594, 7.882083892822266, 66.44778442382812, 39.09822463989258, 43.669189453125, 102.76698303222656, 6.1833038330078125, 16.496089935302734, 51.255409240722656, 47.42534637451172, 42.29156494140625, 42.874168395996094, -11.148231506347656, 19.39750862121582, 39.662193298339844, 49.024574279785156, 37.988311767578125, 49.606201171875, 51.421653747558594, 57.423988342285156, 61.024818420410156, 19.1512393951416, 37.83381652832031, 15.02423095703125, 51.6343994140625, 25.401641845703125, 36.55567169189453, 12.302703857421875, 73.91924285888672, 12.161067962646484, 37.762908935546875, 20.306243896484375, 54.756256103515625, 32.20429229736328, 36.33422088623047, 35.11738204956055, 0.987335205078125, 29.377853393554688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000502.npy"} +{"epoch": 0.737151248164464, "step": 503, "batch_size": 64, "mean": 34.12990188598633, "std": 23.525371551513672, "min": -16.68000030517578, "p10": 0.8879741668701182, "median": 35.727752685546875, "p90": 60.31672592163086, "max": 77.80168151855469, "pos_frac": 0.90625, "sample": [9.478763580322266, -7.293657302856445, 59.579994201660156, 22.21295166015625, 21.664840698242188, -10.64013671875, 69.41532897949219, 35.7420654296875, 1.8334617614746094, 19.57114028930664, 12.450305938720703, 48.12989044189453, 41.053733825683594, 30.385772705078125, 74.78306579589844, 57.2659912109375, 38.161930084228516, 15.357574462890625, 38.66217803955078, 59.63584899902344, 58.05607604980469, 44.85984420776367, 29.438278198242188, 24.735153198242188, 61.528297424316406, 51.89849853515625, -16.68000030517578, 59.534629821777344, 12.880090713500977, 35.48957824707031, 56.45111846923828, -2.70501708984375, 8.91461181640625, 28.986968994140625, -3.829425811767578, 35.71343994140625, 57.3338623046875, 42.597259521484375, 49.66441345214844, 52.42658233642578, 0.48276519775390625, 34.09876251220703, 51.02409362792969, 48.05316925048828, -8.523887634277344, 33.669921875, 21.118196487426758, 52.85521697998047, 62.88164520263672, 59.92132568359375, 34.608856201171875, 22.100730895996094, 60.486183166503906, 77.80168151855469, 71.68942260742188, 18.57867431640625, 16.759288787841797, 37.98594665527344, 17.893653869628906, 2.5509796142578125, 2.5852737426757812, 45.89768981933594, 54.057708740234375, 40.991119384765625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000503.npy"} +{"epoch": 0.7386196769456681, "step": 504, "batch_size": 64, "mean": 39.98591613769531, "std": 26.27921485900879, "min": -12.201004028320312, "p10": 6.993256759643557, "median": 35.44336128234863, "p90": 75.76621398925782, "max": 111.91203308105469, "pos_frac": 0.953125, "sample": [8.82489013671875, 23.63416290283203, 25.33990478515625, 26.203292846679688, 18.726577758789062, 64.3951416015625, 59.949562072753906, 2.962646484375, 96.4140625, 82.9815444946289, 111.91203308105469, 19.341224670410156, 41.76519775390625, 39.602394104003906, 2.677581787109375, 34.27581024169922, 83.46216583251953, 30.420196533203125, 28.691627502441406, 32.17399597167969, 71.70365142822266, 40.69127655029297, 44.39668273925781, 60.47285461425781, 43.07776641845703, -1.1373405456542969, 26.254592895507812, 37.41693878173828, 36.29045486450195, 33.60087585449219, -12.201004028320312, 82.6810302734375, 35.6701545715332, 59.50862503051758, 21.98892593383789, 29.213638305664062, 58.27667236328125, 48.98883056640625, -0.4891242980957031, 46.57587432861328, 30.36260986328125, 99.626708984375, 19.03050994873047, 6.208271026611328, 75.30641174316406, 23.310028076171875, 24.931550979614258, 12.416610717773438, 33.003570556640625, 40.08650207519531, 42.37748718261719, 35.21656799316406, 75.96327209472656, 67.49308776855469, 10.92940902709961, 34.71929931640625, 31.326431274414062, 53.098487854003906, 65.47596740722656, 17.479217529296875, 46.244590759277344, 74.99351501464844, 37.60223388671875, 5.161102294921875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000504.npy"} +{"epoch": 0.7400881057268722, "step": 505, "batch_size": 64, "mean": 28.277128219604492, "std": 29.574512481689453, "min": -35.14299011230469, "p10": -1.4594322204589836, "median": 20.640634536743164, "p90": 71.42545623779299, "max": 103.718994140625, "pos_frac": 0.84375, "sample": [11.36932373046875, 74.81355285644531, 11.556060791015625, 63.4846305847168, -0.2545318603515625, 103.718994140625, 0.97772216796875, 20.51763153076172, 67.26556396484375, 18.505386352539062, 60.627342224121094, 46.30567932128906, 86.07691192626953, -1.7623138427734375, 14.360527038574219, 46.2489013671875, 18.43444061279297, 26.025909423828125, 38.968902587890625, 1.7695350646972656, 21.120935440063477, 73.20826721191406, 38.461700439453125, 11.83819580078125, -21.593902587890625, 45.89923858642578, -12.87490463256836, 42.03693389892578, 58.07682800292969, 76.96446990966797, 8.551807403564453, 35.95436096191406, 37.6668701171875, -17.924270629882812, 20.76363754272461, 58.88081359863281, 38.05738067626953, 15.625646591186523, 6.088033676147461, 0.7597064971923828, 24.90888786315918, -35.14299011230469, 9.132549285888672, 42.314231872558594, 11.963531494140625, 16.27521514892578, 60.47735595703125, 58.70671844482422, 27.813739776611328, 94.1077880859375, 25.367816925048828, 1.7012100219726562, -0.4291725158691406, 79.94389343261719, 58.12210464477539, -0.7527084350585938, 11.887447357177734, 7.445770263671875, 19.60906982421875, 17.650848388671875, 27.400314331054688, -6.172832489013672, 12.8038330078125, -1.9702777862548828], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000505.npy"} +{"epoch": 0.7415565345080763, "step": 506, "batch_size": 64, "mean": 33.98353576660156, "std": 29.518352508544922, "min": -16.371824264526367, "p10": -0.021889114379882113, "median": 30.14310073852539, "p90": 77.02998962402344, "max": 131.52120971679688, "pos_frac": 0.890625, "sample": [-12.327789306640625, 10.306350708007812, 32.401451110839844, 21.172805786132812, 24.12906265258789, 44.06566619873047, 0.6538887023925781, 43.684478759765625, 81.68519592285156, 17.94295883178711, 50.346126556396484, 25.603469848632812, 18.096343994140625, 37.09062957763672, 9.0521240234375, 36.7139892578125, 46.703853607177734, 22.823394775390625, 31.203346252441406, 36.195350646972656, 15.996002197265625, 89.85189056396484, 94.57049560546875, 30.34522247314453, 131.52120971679688, -1.6149024963378906, 21.037986755371094, 58.211456298828125, 36.6644287109375, -12.093513488769531, 42.96104049682617, 24.456117630004883, 65.51148223876953, 21.01932144165039, 30.467208862304688, -10.538627624511719, 71.94772338867188, 67.95333862304688, 18.452972412109375, 91.4810791015625, 53.18119812011719, 51.20403289794922, 29.921859741210938, 12.127029418945312, -16.371824264526367, 16.07740592956543, 35.7037353515625, 31.083656311035156, 74.45025634765625, 58.252410888671875, 21.273237228393555, 4.227821350097656, 29.94097900390625, 41.09191131591797, 78.13558959960938, 85.55561828613281, 3.677154541015625, -1.9102516174316406, 26.722457885742188, 4.874725341796875, -0.3115081787109375, 29.472183227539062, 36.467811584472656, 4.3540496826171875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000506.npy"} +{"epoch": 0.7430249632892805, "step": 507, "batch_size": 64, "mean": 31.98436737060547, "std": 22.67708396911621, "min": -2.321441650390625, "p10": 1.0112215042114259, "median": 32.28876304626465, "p90": 59.337502288818364, "max": 112.11190795898438, "pos_frac": 0.9375, "sample": [9.680784225463867, 1.0511703491210938, 1.3469619750976562, 28.948890686035156, 63.78120422363281, 44.70896911621094, 22.751373291015625, 45.34777069091797, -1.0498504638671875, 56.161773681640625, 36.7554931640625, 44.953834533691406, 19.691864013671875, 45.13294219970703, 40.16998291015625, 28.907806396484375, 6.340568542480469, 30.312042236328125, 33.931427001953125, 112.11190795898438, 6.79949951171875, 31.570480346679688, 0.6943302154541016, 43.204307556152344, 43.047584533691406, 51.518402099609375, 31.474822998046875, 36.26971435546875, 3.883289337158203, 44.06683349609375, 33.00704574584961, 23.426612854003906, -2.300994873046875, -0.9668979644775391, 40.448822021484375, 74.88475036621094, 18.76498794555664, 64.57550811767578, 40.791526794433594, 19.736618041992188, 63.96760559082031, 5.6130218505859375, 50.17244338989258, 27.11248016357422, 59.963775634765625, 24.52239227294922, 19.36821746826172, 38.53321838378906, 40.93724822998047, 51.208404541015625, 33.9902229309082, 22.174848556518555, 0.9941005706787109, 51.423622131347656, 26.050033569335938, 57.876197814941406, 47.023193359375, 65.4707260131836, -2.321441650390625, 0.05756950378417969, 6.5009918212890625, 11.261062622070312, 19.286340713500977, 49.87921142578125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000507.npy"} +{"epoch": 0.7444933920704846, "step": 508, "batch_size": 64, "mean": 32.10009002685547, "std": 28.83991241455078, "min": -20.232826232910156, "p10": -0.9938760757446289, "median": 26.939682960510254, "p90": 64.65440368652344, "max": 108.2960433959961, "pos_frac": 0.84375, "sample": [-2.1569366455078125, 23.40594482421875, -20.232826232910156, 17.698280334472656, 26.311309814453125, -0.37255859375, 0.9139556884765625, 98.24627685546875, 49.921142578125, 49.88561248779297, 50.085784912109375, 21.9498291015625, 40.765586853027344, 49.2547721862793, 67.50978088378906, 45.466583251953125, 60.288333892822266, 41.35725402832031, 108.2960433959961, 47.060089111328125, 6.862216949462891, 38.12901306152344, 24.34084701538086, 42.57593536376953, 61.10919189453125, 57.14118957519531, 7.4362640380859375, 3.5545654296875, 8.645009994506836, 20.069480895996094, -0.3996429443359375, 28.080604553222656, 3.8154144287109375, -1.0213069915771484, 100.97149658203125, -2.3558921813964844, 20.273963928222656, 29.76982307434082, 44.220428466796875, 15.1795654296875, 32.36329650878906, 59.87969207763672, 12.7662353515625, 9.822235107421875, 57.28532409667969, 27.21826171875, 26.661104202270508, 7.628501892089844, 55.07366180419922, 63.088600158691406, 81.17657470703125, 28.785545349121094, -1.0844383239746094, 55.84937286376953, 65.3254623413086, 23.69152069091797, -0.92987060546875, 12.495834350585938, 17.924667358398438, 84.21636199951172, -15.39068603515625, 13.922874450683594, -6.861623764038086, 59.44469451904297], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000508.npy"} +{"epoch": 0.7459618208516887, "step": 509, "batch_size": 64, "mean": 32.780052185058594, "std": 26.028345108032227, "min": -23.97222900390625, "p10": 2.45704517364502, "median": 30.99527645111084, "p90": 62.87817382812501, "max": 108.38638305664062, "pos_frac": 0.90625, "sample": [2.3123397827148438, 13.495193481445312, 21.4609375, 51.843353271484375, 20.454742431640625, 79.91510009765625, 21.41468048095703, 24.993560791015625, 61.25025939941406, 44.082794189453125, 55.58235168457031, 25.799278259277344, 29.389434814453125, 39.336219787597656, 12.508132934570312, 2.7946910858154297, 108.38638305664062, 4.793121337890625, -15.678466796875, 27.740440368652344, 86.517822265625, 34.31638717651367, 30.662887573242188, 21.175018310546875, 27.181961059570312, 12.672359466552734, 33.77744674682617, 29.50914764404297, 44.930931091308594, 55.131744384765625, 34.74280548095703, -9.199563980102539, 16.10946273803711, -21.58346176147461, -10.937362670898438, 70.10484313964844, 36.657684326171875, 54.326629638671875, 21.200828552246094, 9.377593994140625, 6.360073089599609, 49.378173828125, 7.2280426025390625, 40.36509704589844, 14.26641845703125, -23.97222900390625, 63.57585144042969, 57.476165771484375, 46.222564697265625, 64.74705505371094, -1.9930877685546875, 31.327665328979492, 51.86723709106445, 17.049671173095703, 48.74406433105469, 50.41056442260742, 39.111480712890625, 24.84423828125, 24.844776153564453, 38.16645812988281, 69.44758605957031, 56.099510192871094, 53.33493423461914, 60.47136688232422], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000509.npy"} +{"epoch": 0.7474302496328928, "step": 510, "batch_size": 64, "mean": 40.207786560058594, "std": 32.402069091796875, "min": -11.364791870117188, "p10": 0.1764179229736332, "median": 39.43504333496094, "p90": 78.62383346557617, "max": 115.10769653320312, "pos_frac": 0.90625, "sample": [-7.892448425292969, 23.740379333496094, 73.7044448852539, 53.80454635620117, 13.735023498535156, -7.480438232421875, 106.85943603515625, 46.05711364746094, 68.86537170410156, 70.75001525878906, 78.76160430908203, 15.033088684082031, 0.5559673309326172, 18.97899627685547, 71.46458435058594, 51.39504623413086, 48.67774963378906, -11.364791870117188, -6.3545379638671875, 71.69535064697266, 39.88658142089844, 27.74762725830078, 28.187026977539062, 5.37774658203125, 15.6787109375, 36.72275924682617, 61.99129104614258, 111.8809585571289, 67.6192626953125, 1.094696044921875, 115.10769653320312, 51.237857818603516, 13.629035949707031, 42.23268127441406, 12.80963134765625, 66.25727081298828, 41.51094055175781, 3.0330429077148438, 7.413166046142578, 25.136474609375, 19.743515014648438, 29.023269653320312, 78.3023681640625, 38.183074951171875, -5.122016906738281, -2.4141921997070312, 51.7579231262207, 42.82836151123047, 37.95249938964844, 76.28254699707031, 100.18867492675781, 7.507711410522461, 58.61114501953125, 84.78402709960938, 50.37554931640625, 57.95751190185547, 51.02366638183594, 55.77880859375, 0.013753890991210938, 14.862594604492188, 38.98350524902344, 101.43486022949219, 14.844329833984375, 14.853923797607422], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000510.npy"} +{"epoch": 0.748898678414097, "step": 511, "batch_size": 64, "mean": 36.059539794921875, "std": 29.902868270874023, "min": -14.783889770507812, "p10": -1.6879892349243157, "median": 35.783119201660156, "p90": 77.58738403320312, "max": 100.53355407714844, "pos_frac": 0.875, "sample": [5.17132568359375, 78.69699096679688, -1.1024551391601562, 25.334915161132812, 15.183954238891602, 45.81428527832031, 38.98695373535156, 73.59318542480469, 18.211212158203125, 38.571998596191406, 64.6230239868164, 15.014785766601562, 100.53355407714844, 35.05889892578125, -1.9389324188232422, 77.73233032226562, 3.595123291015625, 69.28754425048828, 13.96826171875, 77.17489624023438, 41.1595458984375, 56.52355194091797, 9.448265075683594, 23.360862731933594, 37.311302185058594, 34.617225646972656, -14.783889770507812, 85.41698455810547, -13.543685913085938, 64.520263671875, 67.22671508789062, 34.30671691894531, -6.632173538208008, 37.77081298828125, 57.64765548706055, 53.14459991455078, 95.4093246459961, 48.044227600097656, 3.295074462890625, 26.534975051879883, 57.77561950683594, 36.6175537109375, 33.21291732788086, 21.958656311035156, -5.0861053466796875, 77.24917602539062, -6.182624816894531, 0.5903148651123047, -3.4741878509521484, 36.50733947753906, 0.32160186767578125, 22.953575134277344, 55.122596740722656, 39.51692199707031, 53.249847412109375, 8.343048095703125, 29.608917236328125, 6.527076721191406, 99.55242919921875, 25.489219665527344, 13.575836181640625, 39.11906433105469, 44.39778518676758, 86.57389831542969], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000511.npy"} +{"epoch": 0.750367107195301, "step": 512, "batch_size": 64, "mean": 35.32998275756836, "std": 27.8614559173584, "min": -3.448699951171875, "p10": 3.6228862762451177, "median": 29.20815658569336, "p90": 76.27186431884768, "max": 117.85086059570312, "pos_frac": 0.9375, "sample": [81.75704956054688, 63.86602783203125, 28.29351806640625, 30.646286010742188, 4.0064544677734375, -2.4411773681640625, 43.865509033203125, 32.1419677734375, 5.789422988891602, 20.951467514038086, 18.777395248413086, 41.90167236328125, 1.3076648712158203, 52.15126419067383, -3.448699951171875, 21.01114273071289, 47.57987976074219, 48.98760223388672, 23.708602905273438, -2.465242385864258, 13.599800109863281, 78.78897857666016, -2.2796154022216797, 62.526695251464844, 117.85086059570312, 32.15283203125, 6.867343902587891, 27.228408813476562, 19.282455444335938, 20.97858428955078, 38.56000518798828, 53.487632751464844, 26.825971603393555, 54.631072998046875, 15.02414321899414, 51.4285888671875, 11.53483772277832, 20.536155700683594, 17.93750762939453, 58.65208435058594, 62.05464172363281, 31.26263427734375, 51.192806243896484, 86.79833984375, 4.092231750488281, 34.7535400390625, 46.085533142089844, 16.354997634887695, 4.8671417236328125, 70.39859771728516, 50.574554443359375, 6.1942291259765625, 80.46128845214844, 25.257083892822266, 30.12279510498047, 17.03343963623047, 54.266563415527344, 0.8277664184570312, 27.717437744140625, 25.3350830078125, 3.4584999084472656, 103.30435180664062, 95.08683013916016, 49.616363525390625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000512.npy"} +{"epoch": 0.7518355359765051, "step": 513, "batch_size": 64, "mean": 30.298439025878906, "std": 26.354843139648438, "min": -46.20642852783203, "p10": -2.846168518066406, "median": 31.060245513916016, "p90": 63.91311721801758, "max": 91.74008178710938, "pos_frac": 0.859375, "sample": [35.78162384033203, 15.680229187011719, 82.43345642089844, -2.995840072631836, 33.517578125, 39.438865661621094, 9.534614562988281, 35.51613998413086, -7.7887420654296875, 66.12124633789062, 27.524768829345703, 9.83416748046875, 47.04955291748047, 31.51494789123535, 3.1854190826416016, 63.61723327636719, 30.60554313659668, 8.024127960205078, 43.72584533691406, 62.21363830566406, 31.587631225585938, 45.392059326171875, 69.48556518554688, 91.74008178710938, 41.644325256347656, 8.775508880615234, -5.333591461181641, 24.96595001220703, 49.21669006347656, 29.015247344970703, 43.635711669921875, 7.936140060424805, 49.74700164794922, 52.68701171875, 28.358070373535156, 9.9727783203125, 46.69037628173828, -5.29931640625, 33.02510070800781, 84.24874877929688, 14.247661590576172, 12.714698791503906, 52.19847869873047, -15.793716430664062, 76.82740783691406, -0.7635498046875, 6.478057861328125, 10.825933456420898, 38.072486877441406, 39.60902404785156, 26.43572235107422, -2.4969348907470703, 64.03992462158203, 21.021743774414062, -6.966926574707031, 28.695419311523438, 13.454851150512695, 26.024404525756836, 35.075035095214844, 46.88087844848633, -46.20642852783203, 56.182861328125, 21.90264129638672, 48.61500549316406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000513.npy"} +{"epoch": 0.7533039647577092, "step": 514, "batch_size": 64, "mean": 30.922880172729492, "std": 33.895538330078125, "min": -26.20556640625, "p10": -9.425224304199219, "median": 25.68124771118164, "p90": 74.94202957153321, "max": 136.52645874023438, "pos_frac": 0.828125, "sample": [-9.330215454101562, -3.275360107421875, 13.004661560058594, -17.358842849731445, 29.3748779296875, 26.138328552246094, 46.89716720581055, 1.270721435546875, 96.9827880859375, 64.9616928100586, 4.34825325012207, 43.65190887451172, -3.1593246459960938, 42.905731201171875, 42.42588806152344, 25.224166870117188, 26.588764190673828, 29.073522567749023, 43.54106521606445, 96.85445404052734, -13.523824691772461, 22.719642639160156, 47.934654235839844, 4.10807991027832, 13.564697265625, -10.8663330078125, 13.56365966796875, -9.695228576660156, 56.207557678222656, 30.297080993652344, 14.641935348510742, 66.1622085571289, 31.52740478515625, 40.442955017089844, 16.313522338867188, 28.64452362060547, 55.696685791015625, -4.0797576904296875, 111.58362579345703, -9.4659423828125, 24.99396514892578, 4.7955474853515625, -26.20556640625, 44.661956787109375, 17.729019165039062, -14.30990219116211, 15.620903015136719, 20.453628540039062, 3.51678466796875, 63.661346435546875, 21.22014617919922, 76.3613510131836, 9.497726440429688, 61.91758728027344, 136.52645874023438, 6.095653533935547, 61.10301208496094, 17.146591186523438, 93.39039611816406, 4.890289306640625, 92.04644775390625, 27.36211395263672, 39.0611572265625, 71.63027954101562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000514.npy"} +{"epoch": 0.7547723935389133, "step": 515, "batch_size": 64, "mean": 33.66363525390625, "std": 27.529523849487305, "min": -12.849746704101562, "p10": 3.2429130554199226, "median": 30.475482940673828, "p90": 77.36729507446292, "max": 99.48031616210938, "pos_frac": 0.953125, "sample": [10.98550033569336, 31.06396484375, 64.0091552734375, 91.54501342773438, 15.329185485839844, 4.8730010986328125, 91.50833129882812, 50.39527893066406, 27.698135375976562, 15.443851470947266, 31.02618408203125, 21.069595336914062, 3.8018951416015625, 99.48031616210938, 60.52544403076172, 25.821693420410156, 41.761138916015625, 52.14618682861328, 54.79327392578125, 0.44628143310546875, 53.038299560546875, -2.9212207794189453, 32.62188720703125, 82.37057495117188, 30.048492431640625, 18.822853088378906, 80.01068115234375, 8.613758087158203, 29.159893035888672, 10.524389266967773, 58.27306365966797, 3.0033493041992188, 0.5748920440673828, 8.678268432617188, 48.82124328613281, 69.61231231689453, 8.824867248535156, 1.6013565063476562, 36.62367248535156, 10.141586303710938, 34.95069885253906, 43.02880096435547, 9.439502716064453, 15.883743286132812, 12.051658630371094, 18.633705139160156, 10.973060607910156, 97.52117156982422, 13.199665069580078, 6.249477386474609, 52.346778869628906, 71.19939422607422, -12.849746704101562, -1.2976818084716797, 45.68544006347656, 40.01023864746094, 41.133148193359375, 23.068695068359375, 84.7864990234375, 43.998512268066406, 23.403648376464844, 33.98646545410156, 33.99976348876953, 30.90247344970703], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000515.npy"} +{"epoch": 0.7562408223201175, "step": 516, "batch_size": 64, "mean": 35.44856262207031, "std": 28.498876571655273, "min": -7.7464141845703125, "p10": 1.3574897766113283, "median": 30.471290588378906, "p90": 75.1834426879883, "max": 115.98857116699219, "pos_frac": 0.90625, "sample": [30.054931640625, 55.277740478515625, 21.534698486328125, 13.00051498413086, 59.380287170410156, 41.32611846923828, 16.162261962890625, -7.7464141845703125, 30.887649536132812, 44.038909912109375, 58.413597106933594, 37.03758239746094, 48.563377380371094, 44.947967529296875, 19.517578125, 4.827598571777344, 55.03026580810547, 6.67633056640625, 37.007415771484375, 61.691009521484375, -6.2743072509765625, -0.6097412109375, 16.13538360595703, 41.32500076293945, 62.32012939453125, 19.8975830078125, 1.5634384155273438, 82.68301391601562, 115.98857116699219, 15.437568664550781, 12.60833740234375, 78.58221435546875, 112.71197509765625, 46.11822509765625, 80.4120101928711, 21.610687255859375, 11.320968627929688, -2.5039520263671875, 14.282752990722656, 32.94408416748047, 50.473289489746094, -3.8668994903564453, 19.754074096679688, 13.548419952392578, 22.528339385986328, 26.17816162109375, 28.749740600585938, 41.886260986328125, 20.523033142089844, 65.55906677246094, 40.99333953857422, 66.74073791503906, 12.19573974609375, 9.766708374023438, 78.15228271484375, 20.428741455078125, -4.9461212158203125, 10.525680541992188, 50.21710205078125, 68.25614929199219, 62.1162109375, 83.9107666015625, 49.56471633911133, 1.26922607421875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000516.npy"} +{"epoch": 0.7577092511013216, "step": 517, "batch_size": 64, "mean": 34.930450439453125, "std": 25.577362060546875, "min": -20.419769287109375, "p10": 3.9048402786254885, "median": 32.42748260498047, "p90": 66.3022674560547, "max": 99.5942611694336, "pos_frac": 0.921875, "sample": [22.93033218383789, 51.38365173339844, 30.830917358398438, 4.214210510253906, 31.965423583984375, 22.014190673828125, 29.031265258789062, 59.81985092163086, 65.94293212890625, -5.851068496704102, 40.772377014160156, 70.13655853271484, 60.13519287109375, 51.498111724853516, 46.91609191894531, 29.945701599121094, 36.06493377685547, 16.499969482421875, 28.729665756225586, 61.672325134277344, 36.38947296142578, 46.76982116699219, 46.72845458984375, 22.842910766601562, 35.009422302246094, 5.075384140014648, 26.02886199951172, 33.414100646972656, 26.1326904296875, 96.33760070800781, 13.555511474609375, 39.763938903808594, -4.315225601196289, 66.45626831054688, 14.011764526367188, 52.47541809082031, 16.6756591796875, 64.60926055908203, -20.419769287109375, -3.3677749633789062, 46.14866638183594, 28.691743850708008, 3.7722530364990234, 24.464454650878906, 38.06562805175781, 56.27385711669922, 99.5942611694336, 37.100921630859375, 16.16762924194336, 86.70309448242188, 32.88954162597656, 64.00390625, 29.986038208007812, 72.80265808105469, 23.443572998046875, 74.05563354492188, 41.973602294921875, 12.4400634765625, 5.563970565795898, 27.562454223632812, -18.533294677734375, 46.21550369262695, 1.510040283203125, 15.826179504394531], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000517.npy"} +{"epoch": 0.7591776798825257, "step": 518, "batch_size": 64, "mean": 31.17576789855957, "std": 26.12419319152832, "min": -19.29910659790039, "p10": -1.217963409423825, "median": 27.532428741455078, "p90": 71.54044113159179, "max": 102.85406494140625, "pos_frac": 0.890625, "sample": [19.22848129272461, 72.41767883300781, 27.754989624023438, 50.71565628051758, 48.60365295410156, 21.649864196777344, -5.369773864746094, 34.35527801513672, 25.243633270263672, 15.387985229492188, 43.669803619384766, 25.177650451660156, 31.767196655273438, 16.95319366455078, 47.34297180175781, 40.04914093017578, 47.378135681152344, 18.34192657470703, 12.975616455078125, 43.78961181640625, 61.93394470214844, -2.5654525756835938, -19.29910659790039, 23.77185821533203, -5.135501861572266, 42.276123046875, 82.00175476074219, 27.30986785888672, 12.434764862060547, 97.48042297363281, 72.4111557006836, 1.926177978515625, 71.34459686279297, 39.73957443237305, 12.812454223632812, 18.674346923828125, 32.48808670043945, 22.774688720703125, 49.14495849609375, 61.19282150268555, 5.040290832519531, 18.277984619140625, 28.004737854003906, 18.127052307128906, 12.359079360961914, 28.673675537109375, 6.029838562011719, -11.346675872802734, 31.45902442932129, 35.04167175292969, 4.1465911865234375, 11.139644622802734, -8.160003662109375, 40.71464538574219, -7.767704010009766, 75.47308349609375, 23.87253761291504, 39.2694091796875, 11.215518951416016, 102.85406494140625, 43.96569061279297, 19.84161376953125, 55.242645263671875, 71.62437438964844], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000518.npy"} +{"epoch": 0.7606461086637298, "step": 519, "batch_size": 64, "mean": 32.517459869384766, "std": 30.833126068115234, "min": -20.79126739501953, "p10": 3.587921142578125, "median": 26.6220703125, "p90": 73.58830490112307, "max": 138.8477020263672, "pos_frac": 0.9375, "sample": [10.852188110351562, 39.040245056152344, 24.118816375732422, 8.246513366699219, 96.37799072265625, 6.1593017578125, 65.62688446044922, 43.49481201171875, 90.23954772949219, 36.64691162109375, 43.59516906738281, 67.95453643798828, -10.433612823486328, 76.74679565429688, 45.07014465332031, 1.1853828430175781, 124.0179443359375, 28.411117553710938, 28.683578491210938, 42.20709991455078, 58.79296875, 8.97601318359375, 23.230865478515625, 13.1365966796875, 7.519683837890625, 35.00830078125, 34.04833984375, 16.699615478515625, 15.652397155761719, -7.351984024047852, 6.3131103515625, 76.00277709960938, 44.161346435546875, 8.245346069335938, 33.876495361328125, 62.87714385986328, 24.746994018554688, 7.996635437011719, 10.996849060058594, 29.443870544433594, 21.989395141601562, 18.897964477539062, 52.90837097167969, 5.33013916015625, -2.7817344665527344, 57.37422180175781, 81.25662231445312, 9.286865234375, 22.23419189453125, 37.27259826660156, 42.738922119140625, 138.8477020263672, 3.8423728942871094, 60.87306213378906, 3.5546875, 45.451416015625, 26.958175659179688, 3.66546630859375, 12.986320495605469, 26.285964965820312, 1.6796340942382812, 18.021930694580078, 34.619842529296875, -20.79126739501953], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000519.npy"} +{"epoch": 0.762114537444934, "step": 520, "batch_size": 64, "mean": 32.879398345947266, "std": 25.705875396728516, "min": -15.548225402832031, "p10": 4.343635368347168, "median": 30.304351806640625, "p90": 72.46442718505861, "max": 95.61735534667969, "pos_frac": 0.921875, "sample": [40.872169494628906, 30.113449096679688, 92.1671142578125, 10.233823776245117, 44.50389099121094, 41.669803619384766, 42.167625427246094, 17.161165237426758, 22.390830993652344, 30.495254516601562, -0.3066864013671875, -2.3740234375, 73.86990356445312, 4.3262786865234375, 4.384134292602539, 15.282188415527344, 62.615455627441406, 95.61735534667969, 32.77423095703125, 12.081661224365234, 59.649200439453125, 88.01513671875, -15.548225402832031, 22.921493530273438, 14.216140747070312, 63.39011001586914, 36.7947883605957, 80.75695037841797, 33.536041259765625, 37.45073318481445, 19.840652465820312, 22.94597625732422, 50.16166687011719, 32.748931884765625, 14.095687866210938, 39.50577163696289, 78.78091430664062, 38.478607177734375, 28.76030731201172, 40.75563049316406, 26.444171905517578, 23.095090866088867, 0.6799278259277344, 19.951171875, 29.730567932128906, 68.36168670654297, 9.489044189453125, 32.85303497314453, 81.45670318603516, -6.0802154541015625, 42.53175354003906, 58.09654235839844, 15.470321655273438, 15.186481475830078, -5.5746612548828125, 69.18498229980469, 11.048797607421875, 26.67194366455078, 31.710556030273438, 33.23712921142578, 33.36289978027344, 9.217437744140625, 16.065826416015625, 4.78814697265625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000520.npy"} +{"epoch": 0.7635829662261381, "step": 521, "batch_size": 64, "mean": 32.02197265625, "std": 26.55714988708496, "min": -12.641510009765625, "p10": 0.9970405578613294, "median": 28.202238082885742, "p90": 71.74784317016602, "max": 99.91277313232422, "pos_frac": 0.90625, "sample": [24.372940063476562, 80.92129516601562, 35.132568359375, 10.727622985839844, -5.159379959106445, -0.6948699951171875, 26.865493774414062, 11.823944091796875, 12.065155029296875, 29.538982391357422, 16.58224868774414, -12.641510009765625, -9.477203369140625, 26.62078857421875, 55.547706604003906, 99.91277313232422, 9.5037841796875, 9.156814575195312, -0.131805419921875, 87.83233642578125, 11.484859466552734, 12.830482482910156, 20.388872146606445, 11.818216323852539, 73.33222961425781, 36.067779541015625, 46.09556579589844, 9.105583190917969, 29.539566040039062, 18.095352172851562, 44.39784240722656, 53.673553466796875, 16.690773010253906, 15.141181945800781, 2.2708053588867188, 39.847747802734375, 13.825775146484375, 34.66398620605469, 65.18222045898438, 25.43415069580078, 19.027536392211914, 53.51046371459961, 40.39649200439453, 41.15815353393555, -7.108795166015625, 34.04644775390625, 5.011798858642578, 39.78416442871094, 8.20123291015625, 11.991531372070312, 55.850982666015625, 52.57789611816406, 86.61924743652344, 32.73103332519531, 0.451141357421875, 75.76480102539062, 44.35246276855469, 66.2802505493164, 16.931407928466797, 71.03157043457031, 39.224945068359375, 69.02088928222656, 32.109519958496094, 72.05481719970703], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000521.npy"} +{"epoch": 0.7650513950073421, "step": 522, "batch_size": 64, "mean": 27.99399185180664, "std": 26.318756103515625, "min": -13.91171646118164, "p10": 1.539336776733399, "median": 23.19931983947754, "p90": 60.11579170227051, "max": 101.2135009765625, "pos_frac": 0.90625, "sample": [13.117973327636719, 40.43242645263672, 34.295677185058594, 3.5716323852539062, 40.826080322265625, 23.02560043334961, 27.338348388671875, 25.67922019958496, 16.52548599243164, -13.91171646118164, 10.839069366455078, 16.343833923339844, 82.49423217773438, 2.5159912109375, 59.20134353637695, 12.103652954101562, 36.93929672241211, 1.3332481384277344, 36.30839538574219, 38.869483947753906, 47.319053649902344, 59.167396545410156, 43.043174743652344, 22.39299774169922, -4.983287811279297, 99.59895324707031, 31.575584411621094, 33.38890838623047, 88.41265869140625, 9.83740234375, 42.03572082519531, 29.179855346679688, 23.37303924560547, 4.079032897949219, 35.83502197265625, 3.5331573486328125, 12.342973709106445, 14.937799453735352, 44.3242073059082, 5.583526611328125, 3.4417800903320312, -0.42130279541015625, 25.051956176757812, -1.4100227355957031, 32.6461296081543, 34.22614288330078, 17.78870391845703, 19.785919189453125, 18.854320526123047, 101.2135009765625, 14.967971801757812, 26.963626861572266, 84.68406677246094, -3.6422557830810547, 7.324848175048828, 29.588768005371094, -8.398529052734375, 2.0202102661132812, 60.50769805908203, 12.708131790161133, 90.62350463867188, 21.147537231445312, 39.53253936767578, 9.583755493164062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000522.npy"} +{"epoch": 0.7665198237885462, "step": 523, "batch_size": 64, "mean": 31.942184448242188, "std": 28.90403938293457, "min": -26.954513549804688, "p10": -1.9853578567504861, "median": 26.256103515625, "p90": 72.3850700378418, "max": 88.40420532226562, "pos_frac": 0.890625, "sample": [17.872406005859375, -3.8478775024414062, 20.337631225585938, 48.53794860839844, 70.2633056640625, 4.8070068359375, 46.0556640625, 88.40420532226562, 16.420303344726562, 40.1746826171875, -16.248716354370117, 55.15094757080078, 2.1013526916503906, 43.49430847167969, 22.12319564819336, 10.31110954284668, 55.96307373046875, 78.84228515625, 34.457088470458984, 11.381660461425781, 1.6184158325195312, 51.222991943359375, 45.6180305480957, 18.281625747680664, 39.39762496948242, 52.24729919433594, 2.716320037841797, 22.02154541015625, 10.464744567871094, 86.22187042236328, 22.426626205444336, 27.637605667114258, 5.62449836730957, -26.954513549804688, 61.47828674316406, 72.55555725097656, -3.4094676971435547, 0.10129737854003906, 66.97959899902344, 15.77659797668457, 35.22248840332031, 13.48497200012207, 21.579864501953125, 0.36438751220703125, 26.927474975585938, -2.879638671875, 67.876220703125, 33.05366516113281, 71.98726654052734, 10.617380142211914, 73.72132873535156, 30.07659149169922, 7.253746032714844, -4.24969482421875, 64.1263427734375, 24.82787322998047, 63.22430419921875, -9.754486083984375, 55.846946716308594, 88.17526245117188, 88.128662109375, 31.740039825439453, 8.735786437988281, 25.584732055664062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000523.npy"} +{"epoch": 0.7679882525697503, "step": 524, "batch_size": 64, "mean": 29.516183853149414, "std": 24.76129913330078, "min": -30.669967651367188, "p10": 2.2377637863159183, "median": 27.177433967590332, "p90": 63.0700668334961, "max": 87.40853881835938, "pos_frac": 0.90625, "sample": [11.290702819824219, 81.72240447998047, 36.191131591796875, 30.434158325195312, 20.358726501464844, 21.601844787597656, 8.89471435546875, 1.986703872680664, 18.86731719970703, 9.499637603759766, 37.059425354003906, 12.093154907226562, 26.424224853515625, 71.30252838134766, 59.15666961669922, 36.70539093017578, 20.12432861328125, 20.020950317382812, 39.768310546875, 26.937667846679688, -30.669967651367188, 12.423515319824219, 45.783599853515625, 64.01835632324219, 6.994377136230469, 2.8235702514648438, 39.562156677246094, 51.779701232910156, 52.38744354248047, -22.352405548095703, 22.903057098388672, 5.7494354248046875, 7.699159622192383, 11.402292251586914, 87.40853881835938, -1.547262191772461, 33.002410888671875, 67.99908447265625, 49.70671081542969, 49.091651916503906, 29.55048370361328, -0.911834716796875, -8.143836975097656, -1.6460113525390625, 48.93067169189453, 26.22021484375, 9.123779296875, 9.732872009277344, 40.22196578979492, 46.82206726074219, 65.49343872070312, 44.627838134765625, 24.88995361328125, 8.133392333984375, 12.764541625976562, 38.65013122558594, 31.741287231445312, 82.56492614746094, 27.417200088500977, 60.857391357421875, 45.19847106933594, 30.754135131835938, 58.6668815612793, 10.77032470703125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000524.npy"} +{"epoch": 0.7694566813509545, "step": 525, "batch_size": 64, "mean": 35.2094841003418, "std": 23.788013458251953, "min": -5.002815246582031, "p10": 7.017336654663087, "median": 31.00523090362549, "p90": 76.61958923339844, "max": 88.61187744140625, "pos_frac": 0.953125, "sample": [29.907413482666016, 25.3641357421875, 28.96148681640625, 31.161117553710938, -3.176126480102539, 61.33103942871094, 58.74254608154297, 43.390037536621094, 80.09185791015625, 39.0642204284668, 6.667205810546875, 13.867786407470703, 5.5664825439453125, 50.256568908691406, 35.67437744140625, 5.894073486328125, 14.146682739257812, 6.031402587890625, 59.261688232421875, 26.802261352539062, 53.64178466796875, 10.703025817871094, 11.649589538574219, 54.603179931640625, 39.14967346191406, 25.48839569091797, 12.395980834960938, 45.388099670410156, 35.59294128417969, 31.314483642578125, 74.2445068359375, -5.002815246582031, 47.894287109375, 23.623661041259766, 53.30787658691406, 43.67205810546875, 16.516590118408203, 53.81256866455078, 19.371978759765625, 27.938186645507812, 77.93177795410156, 25.56402587890625, 20.269821166992188, -2.049955368041992, 26.957504272460938, 34.19459533691406, 38.596405029296875, 15.203372955322266, 37.147159576416016, 29.630638122558594, 71.78121948242188, 77.63748168945312, 11.876426696777344, 23.591293334960938, 88.61187744140625, 30.84934425354004, 34.47564697265625, 19.589149475097656, 13.132377624511719, 32.662193298339844, 79.9976806640625, 78.21546173095703, 7.834308624267578, 85.39485168457031], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000525.npy"} +{"epoch": 0.7709251101321586, "step": 526, "batch_size": 64, "mean": 34.531463623046875, "std": 27.9222412109375, "min": -19.695327758789062, "p10": -0.11722240447997345, "median": 31.96575927734375, "p90": 75.35458602905274, "max": 109.80825805664062, "pos_frac": 0.890625, "sample": [96.00753784179688, 89.79115295410156, 19.02224349975586, 13.585060119628906, 42.29460144042969, 10.949722290039062, -9.695215225219727, -3.3235034942626953, 41.95570373535156, -19.695327758789062, 50.717018127441406, 39.63258361816406, 40.172950744628906, 16.27271842956543, 49.77069854736328, 43.87841033935547, 23.99226188659668, 59.59393310546875, 31.181594848632812, 72.58299255371094, 6.974340438842773, 32.74992370605469, 15.84733772277832, 43.66344451904297, 85.42971801757812, 22.16615104675293, 109.80825805664062, 22.391586303710938, 43.382171630859375, 11.487977981567383, -3.156463623046875, 59.03538131713867, 18.62884521484375, 12.346549987792969, -6.47808837890625, 13.744720458984375, 47.488365173339844, 8.490917205810547, 14.119140625, 25.928417205810547, 41.596038818359375, 39.55218505859375, 52.66606140136719, 11.388824462890625, 20.37963104248047, 18.738845825195312, 60.444496154785156, 69.63782501220703, 22.795055389404297, 49.830074310302734, 23.078033447265625, 38.86381530761719, 17.9193115234375, 77.60357666015625, 52.555213928222656, 57.39041519165039, 76.54241180419922, 26.29570770263672, 34.27766418457031, 9.937202453613281, -6.783454895019531, 53.859107971191406, -12.138412475585938, 80.84825897216797], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000526.npy"} +{"epoch": 0.7723935389133627, "step": 527, "batch_size": 64, "mean": 32.13435363769531, "std": 28.15311050415039, "min": -17.984817504882812, "p10": 1.0183830261230498, "median": 26.386987686157227, "p90": 70.40259170532227, "max": 110.39907836914062, "pos_frac": 0.890625, "sample": [33.72443389892578, 55.557952880859375, 49.68627166748047, 57.820648193359375, 40.50361633300781, 29.159347534179688, 89.32999420166016, -3.5069007873535156, 29.47430419921875, 26.093326568603516, 36.07850646972656, -2.8073654174804688, 89.66126251220703, 11.51995849609375, 25.812049865722656, 3.9179153442382812, 67.19979095458984, -1.57574462890625, 6.772834777832031, 34.622528076171875, 3.95556640625, 33.920257568359375, 21.815364837646484, 9.211681365966797, 30.05515480041504, 69.75870513916016, -7.403034210205078, 16.117259979248047, 11.542999267578125, 35.70021057128906, 8.384544372558594, 41.86907196044922, 46.199859619140625, 50.99008560180664, 110.39907836914062, 55.71080017089844, 87.734619140625, 13.758779525756836, 24.702896118164062, 9.83807373046875, 62.852203369140625, 18.89128875732422, 99.17973327636719, 24.724334716796875, -11.935592651367188, 28.32994842529297, 31.352394104003906, 19.265167236328125, 18.608551025390625, 5.590789794921875, 39.16834259033203, 15.017974853515625, 22.11972427368164, 23.096946716308594, 26.680648803710938, 23.211776733398438, 92.97511291503906, 29.66248893737793, 21.306419372558594, 46.967281341552734, 13.756988525390625, -17.984817504882812, -0.224273681640625, 70.67854309082031], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000527.npy"} +{"epoch": 0.7738619676945668, "step": 528, "batch_size": 64, "mean": 30.600479125976562, "std": 25.045381546020508, "min": -9.9874267578125, "p10": 0.1583999633789076, "median": 27.308632850646973, "p90": 65.95662612915041, "max": 95.4383544921875, "pos_frac": 0.890625, "sample": [3.0584716796875, 51.93562316894531, 61.80455780029297, 27.67460823059082, 20.63812255859375, 2.7137451171875, 1.9261932373046875, 69.02778625488281, -9.9874267578125, 45.19068908691406, 25.984359741210938, 25.044960021972656, 26.942657470703125, 16.57571792602539, 35.89222717285156, 37.33234405517578, 31.740272521972656, 29.716140747070312, 67.736083984375, 84.713134765625, -0.7147121429443359, 46.3836669921875, 21.323959350585938, 21.418472290039062, 2.108236312866211, 95.4383544921875, 41.27909851074219, 93.52047729492188, -2.0660552978515625, 41.51941680908203, 10.958549499511719, 24.793533325195312, 18.69635009765625, 1.5425224304199219, 29.269363403320312, 41.60462188720703, -1.2180404663085938, 57.49029541015625, 40.78705596923828, 6.7350616455078125, 26.10332679748535, 43.29682159423828, 17.403701782226562, 37.431549072265625, 42.77100372314453, 22.55213165283203, 36.7705078125, 49.63909912109375, 28.034709930419922, 69.00437927246094, -3.1370849609375, 41.66365051269531, -6.653251647949219, 58.19956970214844, 14.087797164916992, 85.00856018066406, 38.160037994384766, 2.5924415588378906, -0.4347953796386719, 13.317474365234375, 19.348617553710938, 7.031749725341797, 53.48262023925781, 16.22571563720703], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000528.npy"} +{"epoch": 0.775330396475771, "step": 529, "batch_size": 64, "mean": 27.404434204101562, "std": 28.750995635986328, "min": -49.11228942871094, "p10": -3.0843292236328113, "median": 23.463945388793945, "p90": 58.57079963684083, "max": 106.96015167236328, "pos_frac": 0.859375, "sample": [-1.600738525390625, 4.394256591796875, -4.405326843261719, 13.03936767578125, 51.914154052734375, 23.499691009521484, 31.556808471679688, 17.414283752441406, 1.2264785766601562, 106.96015167236328, 18.45247459411621, 20.35495376586914, 20.04534912109375, 32.195579528808594, 37.44682693481445, 8.336599349975586, 37.486534118652344, 50.996673583984375, 13.761482238769531, 6.105949401855469, 13.716575622558594, 28.851394653320312, 100.8612060546875, 70.07234191894531, 35.68376159667969, -25.505149841308594, 23.428199768066406, 41.265480041503906, 15.048072814941406, 35.108360290527344, 35.63536834716797, 1.4837284088134766, 29.333772659301758, 50.02958679199219, 22.671493530273438, -8.864431381225586, 50.883087158203125, 59.5284309387207, 74.34938049316406, 51.629302978515625, 22.430618286132812, 36.846214294433594, -3.72015380859375, -49.11228942871094, 41.39186096191406, -16.69021987915039, 26.782503128051758, 19.372894287109375, 52.43528747558594, 13.659996032714844, 52.767547607421875, 32.02996063232422, 3.991914749145508, 19.772701263427734, 20.766799926757812, -0.09523773193359375, 1.6910934448242188, 97.35784912109375, 52.119659423828125, 56.336326599121094, 1.0880203247070312, -13.860809326171875, 25.89522933959961, 66.23434448242188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000529.npy"} +{"epoch": 0.7767988252569751, "step": 530, "batch_size": 64, "mean": 36.536338806152344, "std": 27.414752960205078, "min": -22.098388671875, "p10": -1.9705184936523399, "median": 37.732479095458984, "p90": 71.42420349121095, "max": 100.0181884765625, "pos_frac": 0.890625, "sample": [71.85648345947266, 47.77762985229492, 20.56371307373047, 36.714149475097656, 52.57328796386719, 17.077383041381836, 69.8823471069336, 71.72711944580078, 14.991962432861328, 18.357681274414062, 44.032310485839844, -3.5865249633789062, 42.96722412109375, 32.05108642578125, 70.71739959716797, 42.33134460449219, 15.630189895629883, 50.05388641357422, 33.153541564941406, 40.27056884765625, 24.2314453125, 27.093345642089844, 93.94039916992188, 32.18596649169922, 28.480056762695312, 42.81988525390625, -22.098388671875, 76.60547637939453, 40.01378631591797, 53.61570739746094, 1.8001632690429688, 37.942344665527344, 100.0181884765625, 48.09059143066406, 45.6724853515625, -14.599796295166016, 42.70094299316406, 36.154335021972656, 20.8583984375, -10.593090057373047, 15.191225051879883, 42.763214111328125, 53.295555114746094, 82.22259521484375, -6.818708419799805, 16.84410858154297, 63.54017639160156, -16.644203186035156, 93.95512390136719, 21.82166290283203, 58.86892318725586, 6.336568832397461, 65.93525695800781, -9.498855590820312, 59.1153564453125, 15.481529235839844, 19.895889282226562, 37.522613525390625, 57.17867660522461, 55.99919891357422, 25.003490447998047, 45.99047088623047, 28.55023956298828, 11.70077133178711], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000530.npy"} +{"epoch": 0.7782672540381792, "step": 531, "batch_size": 64, "mean": 32.21106719970703, "std": 27.86233901977539, "min": -27.6522216796875, "p10": 1.2423950195312523, "median": 28.706939697265625, "p90": 70.45309066772461, "max": 106.68296813964844, "pos_frac": 0.90625, "sample": [44.04728698730469, -6.845888137817383, 24.46746826171875, 58.59044647216797, 36.389183044433594, -20.335357666015625, 5.977151870727539, 21.934616088867188, 59.63849639892578, 30.925216674804688, 11.971467971801758, 88.26051330566406, 0.3137493133544922, 25.538414001464844, 39.01850128173828, 12.875865936279297, 48.92529296875, 16.26422882080078, 6.4905242919921875, 24.516036987304688, 70.2763442993164, 46.22055435180664, 35.42039489746094, 28.300430297851562, 62.850791931152344, 43.71312713623047, 31.62911605834961, 43.92218017578125, 49.77124786376953, 8.426597595214844, 39.874107360839844, 28.811622619628906, 50.01490020751953, 15.722946166992188, 3.4092350006103516, 19.653076171875, 3.5113143920898438, 27.912841796875, -3.0337181091308594, 62.351341247558594, 28.602256774902344, 106.68296813964844, 70.63509368896484, 4.859100341796875, 90.7366943359375, -27.6522216796875, 34.720855712890625, 24.549850463867188, 75.66561889648438, 53.08311462402344, 27.579818725585938, 86.92403411865234, 70.52883911132812, 24.010883331298828, 48.4167366027832, 4.969608306884766, 41.071510314941406, 5.4031829833984375, -9.007293701171875, 15.402137756347656, 7.806358337402344, 44.8713493347168, 44.52778625488281, -10.601724624633789], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000531.npy"} +{"epoch": 0.7797356828193832, "step": 532, "batch_size": 64, "mean": 38.08259582519531, "std": 29.897003173828125, "min": -8.957740783691406, "p10": 2.6689653396606445, "median": 36.68658447265625, "p90": 78.51866760253907, "max": 154.45474243164062, "pos_frac": 0.921875, "sample": [74.22850036621094, 46.843055725097656, 63.39116668701172, 46.650550842285156, 98.34963989257812, 62.04400634765625, 80.5245361328125, 56.594879150390625, -4.122474670410156, 33.959476470947266, 10.673015594482422, 36.780738830566406, 38.14899444580078, 35.333900451660156, 21.164016723632812, 38.20098876953125, 93.02200317382812, -6.769996643066406, 50.170684814453125, 58.10376739501953, 41.79322814941406, 79.37368774414062, 76.52362060546875, 79.48419952392578, 69.84562683105469, 45.299224853515625, 6.274543762207031, 18.908180236816406, 82.50967407226562, -3.8324432373046875, 51.033348083496094, 2.8354110717773438, 10.285888671875, 45.91473388671875, 20.614656448364258, 2.5976314544677734, 42.571861267089844, 29.206268310546875, 47.99256134033203, 33.918731689453125, -8.957740783691406, 63.11981964111328, 3.4482498168945312, 37.249053955078125, 42.24516296386719, 17.853057861328125, 29.25334930419922, 50.40597915649414, 4.465354919433594, -3.2153167724609375, 9.86297607421875, 10.987207412719727, 30.898849487304688, 154.45474243164062, 45.27617645263672, 24.207984924316406, 36.592430114746094, 21.061012268066406, 31.11175537109375, 33.866275787353516, 13.571510314941406, 1.728179931640625, 40.04132080078125, 31.31664276123047], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000532.npy"} +{"epoch": 0.7812041116005873, "step": 533, "batch_size": 64, "mean": 26.412456512451172, "std": 24.175979614257812, "min": -34.97151184082031, "p10": -2.6069808959960934, "median": 24.64776039123535, "p90": 63.027478027343754, "max": 76.78076934814453, "pos_frac": 0.859375, "sample": [-2.7656097412109375, -9.005350112915039, 76.78076934814453, 27.771469116210938, -0.993408203125, 23.95557403564453, 37.96104431152344, 72.83106994628906, 25.15935516357422, 19.17119598388672, 62.52410888671875, 26.681373596191406, 60.24962615966797, 7.152629852294922, -9.348861694335938, 36.48747253417969, 65.35435485839844, 38.807777404785156, -7.270757675170898, 17.20260238647461, 8.894577026367188, 22.60516357421875, 32.103370666503906, 62.69482421875, 36.532379150390625, 66.6701889038086, 27.015195846557617, -34.97151184082031, 28.392902374267578, 66.11408996582031, 16.936737060546875, 51.137176513671875, 35.475650787353516, 15.449668884277344, 24.772136688232422, 10.738578796386719, 36.478668212890625, 26.088764190673828, 8.878936767578125, 8.225414276123047, 5.221202850341797, 3.222827911376953, 71.13175964355469, 55.60554504394531, -19.2108154296875, 43.03257751464844, 16.10077667236328, -2.236846923828125, 23.17212677001953, 8.0181884765625, 9.996461868286133, 28.731700897216797, 29.915786743164062, 7.5946807861328125, 19.075462341308594, 12.965507507324219, -5.147871017456055, 16.696762084960938, 45.705387115478516, 63.1700439453125, 22.904130935668945, 24.52338409423828, 49.000022888183594, 42.269142150878906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000533.npy"} +{"epoch": 0.7826725403817915, "step": 534, "batch_size": 64, "mean": 32.42583465576172, "std": 27.588502883911133, "min": -13.564750671386719, "p10": 2.9154197692871096, "median": 25.40847396850586, "p90": 74.13613815307619, "max": 104.756103515625, "pos_frac": 0.9375, "sample": [35.90369415283203, 45.63658142089844, 2.8851776123046875, 76.07618713378906, 17.89703369140625, 23.091476440429688, 9.98583984375, 66.25714111328125, 18.51009750366211, 29.204923629760742, 8.35622787475586, 48.444549560546875, 2.9859848022460938, 11.495803833007812, 53.976829528808594, 104.756103515625, 49.030059814453125, 45.69352722167969, 45.31275177001953, 83.5997543334961, 7.195146560668945, -2.4904823303222656, 21.321380615234375, 25.93780517578125, 50.21912384033203, 56.66427230834961, 101.84442138671875, 15.520675659179688, 5.24749755859375, 16.73456573486328, 14.384653091430664, 42.05938720703125, 12.379362106323242, 18.56079864501953, 19.051868438720703, -13.564750671386719, 8.903579711914062, 69.4654541015625, 87.14856719970703, 71.1523208618164, 39.626922607421875, 25.287948608398438, 20.391626358032227, 32.36701965332031, 84.82392883300781, 25.52899932861328, 30.159366607666016, 2.3944053649902344, 12.09893798828125, 64.88374328613281, 75.4149169921875, -5.399290084838867, 28.336044311523438, 28.13985824584961, 41.006229400634766, 15.97052001953125, 4.728752136230469, 22.812175750732422, 48.64522933959961, 5.304231643676758, 51.62034606933594, -1.8844985961914062, 0.30666351318359375, 15.853973388671875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000534.npy"} +{"epoch": 0.7841409691629956, "step": 535, "batch_size": 64, "mean": 35.92974090576172, "std": 27.33642578125, "min": -18.123825073242188, "p10": 3.8485309600830084, "median": 36.855831146240234, "p90": 70.82077484130859, "max": 107.7199935913086, "pos_frac": 0.921875, "sample": [3.6503067016601562, 42.636474609375, 23.866607666015625, 9.488265991210938, 23.845489501953125, 13.161308288574219, 36.77162170410156, 95.62340545654297, 41.28668212890625, 14.554412841796875, 16.672168731689453, 57.76005554199219, 96.75413513183594, 82.87493133544922, 23.175094604492188, -3.7380104064941406, -8.8367919921875, 71.09112548828125, 36.44209289550781, 39.7752685546875, 73.72750091552734, 39.519439697265625, 11.687690734863281, 55.82115173339844, 51.775604248046875, 65.97765350341797, 27.624418258666992, 50.21654510498047, 7.259500503540039, 45.343414306640625, 51.45880126953125, 30.83940887451172, 55.00798034667969, 70.18995666503906, 41.009437561035156, 3.1558761596679688, 65.7889633178711, 44.88360595703125, 15.94122314453125, 41.59717559814453, 40.89717102050781, 81.49357604980469, 28.772998809814453, 64.6003189086914, 38.38934326171875, 47.83696746826172, 30.059364318847656, 61.030487060546875, 8.351211547851562, -14.600906372070312, 41.08202362060547, 21.20221710205078, 18.411231994628906, 4.311054229736328, 8.377105712890625, -18.123825073242188, 47.34960174560547, -0.6431808471679688, 107.7199935913086, 21.403413772583008, 4.332630157470703, 36.940040588378906, 32.56575012207031, 22.064743041992188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000535.npy"} +{"epoch": 0.7856093979441997, "step": 536, "batch_size": 64, "mean": 40.71318817138672, "std": 27.575429916381836, "min": -5.480587005615234, "p10": 11.2330623626709, "median": 33.35405921936035, "p90": 77.01202926635743, "max": 114.6337890625, "pos_frac": 0.96875, "sample": [30.313512802124023, 0.7311630249023438, 45.749351501464844, 59.66990661621094, 109.31559753417969, 50.77240753173828, 27.246078491210938, 21.808807373046875, -5.480587005615234, 27.13054656982422, 15.691238403320312, 63.31812286376953, 20.93307113647461, 77.22793579101562, 22.199451446533203, 76.50824737548828, 72.64897155761719, 10.599641799926758, 78.8022232055664, 69.82173156738281, 26.929039001464844, 18.060501098632812, 29.800819396972656, 26.767932891845703, 71.8524398803711, 104.21029663085938, 9.615795135498047, 55.47593688964844, 49.908531188964844, -3.138580322265625, 114.6337890625, 59.23161315917969, 61.781890869140625, 43.22020721435547, 39.9696044921875, 25.93508529663086, 24.18402099609375, 97.36709594726562, 12.470989227294922, 10.702522277832031, 28.947769165039062, 17.756378173828125, 43.294898986816406, 32.053253173828125, 37.66725158691406, 6.308134078979492, 15.750350952148438, 35.63766860961914, 34.51679992675781, 47.753753662109375, 23.54520034790039, 13.339580535888672, 18.671035766601562, 48.551979064941406, 42.43849182128906, 86.88600158691406, 21.614479064941406, 32.53109359741211, 14.207881927490234, 29.85645294189453, 74.97969055175781, 58.46092987060547, 34.177024841308594, 52.71092224121094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000536.npy"} +{"epoch": 0.7870778267254038, "step": 537, "batch_size": 64, "mean": 33.64003372192383, "std": 28.76498794555664, "min": -11.466026306152344, "p10": 0.9455032348632819, "median": 24.396183013916016, "p90": 73.92685623168946, "max": 104.32255554199219, "pos_frac": 0.921875, "sample": [-6.627662658691406, 27.498733520507812, 0.4426593780517578, -7.337984085083008, 8.599929809570312, 36.508514404296875, 12.063398361206055, 72.08287811279297, 20.799148559570312, 23.875019073486328, 45.495269775390625, 0.6831817626953125, 27.546127319335938, 9.07568359375, -11.466026306152344, 44.2125244140625, 85.932373046875, 2.289487838745117, 58.63697052001953, 11.919754028320312, 4.86723518371582, 67.28589630126953, 24.48308563232422, 8.745399475097656, 16.68524169921875, 17.822898864746094, 84.76567077636719, 28.165138244628906, 16.628917694091797, 26.002351760864258, 74.71713256835938, 57.64167785644531, 104.32255554199219, 14.979019165039062, 66.1947250366211, 71.01619720458984, 5.650690078735352, 66.43203735351562, 44.671043395996094, 79.26109313964844, 1.557586669921875, 80.79141235351562, 24.309280395507812, -6.57342529296875, 46.70125198364258, 24.119796752929688, 17.78281021118164, 16.499988555908203, 22.12360382080078, 93.00221252441406, 17.39771270751953, 63.76561737060547, 6.508995056152344, 22.03272247314453, 71.37174987792969, 13.554443359375, 60.15190124511719, 18.441205978393555, 39.574676513671875, 35.84727096557617, 31.921390533447266, 65.29815673828125, -0.20119667053222656, 44.415016174316406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000537.npy"} +{"epoch": 0.788546255506608, "step": 538, "batch_size": 64, "mean": 32.44498062133789, "std": 25.140748977661133, "min": -15.597671508789062, "p10": 4.549973297119142, "median": 29.616347312927246, "p90": 67.47978897094728, "max": 111.19149780273438, "pos_frac": 0.953125, "sample": [44.22398376464844, 13.866546630859375, 16.06127166748047, 111.19149780273438, 72.41499328613281, 2.3616943359375, 33.054141998291016, -14.937461853027344, 31.196571350097656, 6.088203430175781, 40.51868438720703, 20.099517822265625, 58.866600036621094, 23.83740234375, 25.037220001220703, 23.970611572265625, 31.319900512695312, 16.73265838623047, 11.299957275390625, 78.25968933105469, 30.338088989257812, -1.2337417602539062, 23.253433227539062, 63.24082946777344, 39.51457595825195, 19.680419921875, 25.046295166015625, 91.67692565917969, 12.028411865234375, 15.148780822753906, 19.043716430664062, 79.37596893310547, 37.35023880004883, -15.597671508789062, 25.676055908203125, 29.08599281311035, 0.2104473114013672, 62.130126953125, 32.26249694824219, 22.985610961914062, 25.14177131652832, 17.96868133544922, 15.28536605834961, 59.72735595703125, 69.20703887939453, 84.32798767089844, 30.14670181274414, 45.81254577636719, 9.65019416809082, 12.574256896972656, 38.372467041015625, 40.91889572143555, 37.04413604736328, 10.914241790771484, 31.311891555786133, 24.720664978027344, 63.44953918457031, 31.117084503173828, 62.229339599609375, 38.554412841796875, 30.28466796875, 3.8907318115234375, 34.545806884765625, 2.602384567260742], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000538.npy"} +{"epoch": 0.7900146842878121, "step": 539, "batch_size": 64, "mean": 31.052791595458984, "std": 27.054018020629883, "min": -23.92156219482422, "p10": 2.5589544296264655, "median": 25.837346076965332, "p90": 66.61943359375, "max": 105.99625396728516, "pos_frac": 0.90625, "sample": [56.409339904785156, 43.720733642578125, 3.0064773559570312, 11.501852035522461, 11.037487030029297, 48.23390197753906, -11.045583724975586, 36.440765380859375, 6.2935791015625, 32.228172302246094, 21.546615600585938, 2.367158889770508, 15.301361083984375, 29.357742309570312, 65.21063232421875, 30.504196166992188, 63.59192657470703, 30.567245483398438, 20.6702880859375, 84.76534271240234, 20.897781372070312, 26.7406005859375, 81.20069885253906, 65.35626220703125, 26.35123634338379, -23.92156219482422, 20.31885528564453, 67.46067810058594, 31.484689712524414, 7.8818511962890625, 4.858642578125, 9.557518005371094, 14.538909912109375, 76.970947265625, 21.569183349609375, 10.426048278808594, 13.410247802734375, 58.931732177734375, 19.909324645996094, 6.927055358886719, -8.818981170654297, 105.99625396728516, 4.459228515625, 55.1771354675293, 65.62303161621094, -1.7929401397705078, 66.58604431152344, 25.323455810546875, 50.42359924316406, -14.482994079589844, 22.907085418701172, -2.7170944213867188, 72.87860107421875, 43.84925842285156, 59.239990234375, 21.581069946289062, 66.63374328613281, 12.68951416015625, 35.09916687011719, 35.64848327636719, 17.563146591186523, 44.45440673828125, 28.668357849121094, 17.80902671813965], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000539.npy"} +{"epoch": 0.7914831130690162, "step": 540, "batch_size": 64, "mean": 35.191566467285156, "std": 28.06586265563965, "min": -22.67938232421875, "p10": 3.350523376464848, "median": 32.72922897338867, "p90": 68.78761978149414, "max": 100.15664672851562, "pos_frac": 0.90625, "sample": [32.70149230957031, 32.75696563720703, 73.88893127441406, 28.41002655029297, 49.71864318847656, 15.885337829589844, 24.26031494140625, -14.109954833984375, -22.67938232421875, -16.997344970703125, -18.734956741333008, 40.04216003417969, 25.916885375976562, 27.725616455078125, 66.16903686523438, 67.32860565185547, 36.454856872558594, 10.276641845703125, 32.61058044433594, 99.67068481445312, 33.463958740234375, 15.518592834472656, 27.998374938964844, 66.36007690429688, 42.00422668457031, 37.328399658203125, 53.79393005371094, 97.41559600830078, 50.12868881225586, 42.686676025390625, 23.58917236328125, 54.45295333862305, 11.522912979125977, 21.814796447753906, 11.283515930175781, 37.18767547607422, 18.428123474121094, 11.953689575195312, 68.66661834716797, 47.30719757080078, 7.567047119140625, 89.48265075683594, 59.75132751464844, 100.15664672851562, -0.8368949890136719, 70.4180908203125, 20.83111572265625, 61.815399169921875, 38.694496154785156, 33.688720703125, 8.760498046875, 39.18528747558594, 63.917327880859375, 20.29559326171875, 18.820106506347656, 20.73284912109375, 57.012054443359375, 20.241472244262695, 49.906494140625, 15.241832733154297, -10.646507263183594, 68.8394775390625, 32.641456604003906, 1.5434417724609375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000540.npy"} +{"epoch": 0.7929515418502202, "step": 541, "batch_size": 64, "mean": 30.185348510742188, "std": 24.248565673828125, "min": -15.55767822265625, "p10": -0.053060913085936356, "median": 26.09178924560547, "p90": 63.64888992309572, "max": 88.49957275390625, "pos_frac": 0.890625, "sample": [65.48184204101562, 40.743812561035156, 46.312828063964844, 66.20816040039062, 14.82080078125, 20.514511108398438, 40.93858337402344, 18.486087799072266, 28.498153686523438, 14.35772705078125, -4.073150634765625, 87.73865509033203, -11.129341125488281, 39.79731750488281, 23.541473388671875, 31.250411987304688, 41.607879638671875, 88.49957275390625, 28.86614227294922, 10.612869262695312, 84.61431884765625, 59.37200164794922, 24.532699584960938, 10.572097778320312, 25.972259521484375, 11.441154479980469, -10.886756896972656, 10.384529113769531, -9.165611267089844, 80.55709838867188, 21.057403564453125, 21.593460083007812, 29.665252685546875, 17.112991333007812, 1.0886154174804688, 29.343223571777344, 34.60917663574219, 46.8374137878418, 54.762062072753906, 10.743270874023438, 14.18048095703125, 35.83576202392578, 17.338760375976562, 55.45426940917969, 21.229721069335938, 70.79302215576172, 30.25869369506836, 16.09893798828125, 47.89363098144531, 43.6866455078125, -0.5423507690429688, 39.428192138671875, 15.03570556640625, 55.95393371582031, 54.521240234375, 24.396976470947266, -0.821044921875, 22.95410919189453, -15.55767822265625, 15.298370361328125, 11.12237548828125, 26.211318969726562, 28.312103271484375, 55.4981803894043], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000541.npy"} +{"epoch": 0.7944199706314243, "step": 542, "batch_size": 64, "mean": 34.35010528564453, "std": 25.037315368652344, "min": -16.164527893066406, "p10": 1.9164722442626965, "median": 34.52328109741211, "p90": 71.85996322631836, "max": 88.80181884765625, "pos_frac": 0.9375, "sample": [25.74597930908203, 45.83019256591797, 75.97552490234375, 20.481788635253906, 0.7224349975585938, 81.22218322753906, 22.801145553588867, 71.259033203125, 76.0982666015625, 12.23004150390625, 61.592613220214844, 81.45332336425781, 31.988208770751953, 1.4343109130859375, 40.6661376953125, 25.920324325561523, 88.80181884765625, 56.69621276855469, 0.408416748046875, 28.73413848876953, 9.815444946289062, 35.250396728515625, 33.796165466308594, 24.602802276611328, 9.2352294921875, 44.6048583984375, 21.013168334960938, 41.148597717285156, 4.06939697265625, 54.882423400878906, 5.615814208984375, 47.7139892578125, 71.71238708496094, -2.8453826904296875, 52.804595947265625, 3.332172393798828, 40.945777893066406, 30.963890075683594, 3.041515350341797, 37.04716873168945, 24.83255386352539, 44.825408935546875, 22.24614906311035, -16.164527893066406, 31.361618041992188, 39.54107666015625, -4.17448616027832, 47.425994873046875, 73.75437927246094, 10.923524856567383, 41.47478103637695, 62.69877243041992, 26.235244750976562, 41.5941047668457, 24.65320587158203, 35.5037841796875, 6.99818229675293, -5.835615158081055, 71.92321014404297, 41.606773376464844, 17.158830642700195, 35.65245056152344, 37.795066833496094, 67.56369018554688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000542.npy"} +{"epoch": 0.7958883994126285, "step": 543, "batch_size": 64, "mean": 25.586671829223633, "std": 25.67181396484375, "min": -24.753982543945312, "p10": -2.599276733398434, "median": 21.897674560546875, "p90": 57.759983062744155, "max": 109.81005859375, "pos_frac": 0.890625, "sample": [7.517768859863281, -5.558265686035156, -6.179679870605469, 15.142776489257812, 27.89271354675293, 72.55261993408203, 1.5431632995605469, 20.203140258789062, 3.5451507568359375, 47.778968811035156, 9.21624755859375, 5.81683349609375, 4.567432403564453, 17.77103042602539, 54.69617462158203, 43.94823455810547, 39.729766845703125, 101.99784851074219, -5.792171478271484, 1.0398025512695312, 29.114761352539062, 24.08759307861328, 14.451427459716797, 13.606435775756836, -24.753982543945312, 19.45415496826172, 11.03912353515625, 70.87643432617188, 39.443443298339844, 26.618072509765625, 33.8216552734375, 15.409591674804688, -8.425994873046875, 61.39507293701172, 38.03656005859375, -4.158882141113281, 109.81005859375, 35.39007568359375, 59.07304382324219, 10.298965454101562, 48.555450439453125, 26.404190063476562, 18.843563079833984, 14.571144104003906, 26.8255615234375, 13.818107604980469, 35.333744049072266, 69.265380859375, -24.327110290527344, 1.4760894775390625, 29.10411834716797, 16.687049865722656, 44.58113479614258, 28.59178924560547, 36.32971954345703, 24.385887145996094, 23.592208862304688, 31.65100860595703, 3.7185516357421875, 19.174087524414062, 7.888208389282227, 42.174644470214844, 14.341573715209961, 52.543701171875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000543.npy"} +{"epoch": 0.7973568281938326, "step": 544, "batch_size": 64, "mean": 32.87367248535156, "std": 23.992013931274414, "min": -16.739727020263672, "p10": 5.802971076965332, "median": 31.42905616760254, "p90": 59.252587890625, "max": 124.5787353515625, "pos_frac": 0.984375, "sample": [66.50517272949219, 22.608489990234375, 47.83511734008789, 45.927490234375, 124.5787353515625, 2.5254058837890625, 43.18980407714844, 24.716556549072266, 47.4545783996582, 6.054681777954102, 91.43521118164062, 10.261213302612305, 3.3886260986328125, 34.74676513671875, 30.044227600097656, 28.380939483642578, 15.076713562011719, 24.37744140625, 2.120361328125, 59.72343444824219, 11.06594467163086, 51.66602325439453, 12.571489334106445, 17.81616973876953, 45.98480987548828, 13.706535339355469, 39.48457336425781, 31.531787872314453, 19.69385528564453, 17.014535903930664, 58.15394592285156, 44.38923645019531, 33.91559600830078, 18.10796356201172, 34.6861572265625, 44.36857223510742, 20.001693725585938, 17.851835250854492, 23.44515609741211, 4.94694709777832, 28.596965789794922, 38.797489166259766, 45.274391174316406, 60.21722412109375, 49.201995849609375, 38.02132034301758, 45.8641357421875, 42.672019958496094, 14.179420471191406, 31.412723541259766, 6.242366790771484, 31.445388793945312, 30.139053344726562, 73.6917724609375, 58.01771545410156, 5.695095062255859, 42.69678497314453, 4.3859405517578125, 48.23207092285156, 9.830772399902344, 33.59619140625, 83.84347534179688, -16.739727020263672, 7.24647331237793], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000544.npy"} +{"epoch": 0.7988252569750367, "step": 545, "batch_size": 64, "mean": 33.994972229003906, "std": 26.682092666625977, "min": -11.536384582519531, "p10": 2.5105293273925784, "median": 33.33341026306152, "p90": 64.93659896850586, "max": 114.582763671875, "pos_frac": 0.9375, "sample": [0.8010311126708984, -3.871337890625, -1.9139404296875, -11.536384582519531, 5.953090667724609, 55.25334548950195, 64.94009399414062, 6.670049667358398, 61.682373046875, 21.53089141845703, 32.6534538269043, 39.813499450683594, -2.5295333862304688, 114.582763671875, 65.30868530273438, 2.4387130737304688, 40.509674072265625, 79.60137176513672, 40.98596954345703, 44.04677963256836, 84.27633666992188, 39.4493293762207, 63.51728820800781, 4.199745178222656, 25.752395629882812, 5.934116363525391, 2.6781005859375, 45.79998779296875, 24.961959838867188, 4.676206588745117, 39.79405212402344, 29.798377990722656, 10.16731071472168, 19.803062438964844, 34.1630859375, 60.065513610839844, 49.77886962890625, 19.604957580566406, 14.33681869506836, 60.725616455078125, 7.332221984863281, 62.743553161621094, 71.6892318725586, 32.67189407348633, 6.207996368408203, 52.15132141113281, 36.641212463378906, 64.9284439086914, 64.04866027832031, 35.30126190185547, 11.892881393432617, 14.759628295898438, 18.075435638427734, 47.575416564941406, 49.33441162109375, 24.730117797851562, 83.65023803710938, 0.576385498046875, 52.7994499206543, 33.99492645263672, 29.726638793945312, 15.65069580078125, 3.6044235229492188, 59.18788146972656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000545.npy"} +{"epoch": 0.8002936857562408, "step": 546, "batch_size": 64, "mean": 33.39392852783203, "std": 23.92932891845703, "min": -21.62310791015625, "p10": 4.795319747924806, "median": 28.33156108856201, "p90": 71.48172073364259, "max": 82.4500732421875, "pos_frac": 0.953125, "sample": [15.079681396484375, 14.552513122558594, 78.19349670410156, 24.853797912597656, 36.240867614746094, 46.7031364440918, 4.322254180908203, 27.961454391479492, 41.49315643310547, 20.89093017578125, 21.824710845947266, 10.568351745605469, 17.951934814453125, 41.06153106689453, 36.91754150390625, 17.833553314208984, 31.40186309814453, 43.308494567871094, 41.166160583496094, 3.6982955932617188, 5.899139404296875, 76.54070281982422, -4.543785095214844, 73.63453674316406, 32.82884216308594, 15.010360717773438, 56.91987991333008, 33.75254821777344, 81.49784088134766, 22.49634552001953, 7.0110626220703125, 11.856491088867188, 26.20693588256836, 55.77913284301758, 28.70166778564453, 2.2590179443359375, 58.50156784057617, 16.75390625, 69.59215545654297, 82.4500732421875, -21.62310791015625, 50.950836181640625, 63.4259033203125, 24.221717834472656, 35.27630615234375, 59.15608215332031, -2.5178489685058594, 72.29153442382812, 49.500213623046875, 36.90573501586914, 22.652137756347656, 53.61293029785156, 27.295467376708984, 77.29606628417969, 25.841989517211914, 42.51581573486328, 18.884315490722656, 3.4581336975097656, 8.221134185791016, 25.177902221679688, 14.53892707824707, 63.47605895996094, 39.023048400878906, 18.457870483398438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000546.npy"} +{"epoch": 0.801762114537445, "step": 547, "batch_size": 64, "mean": 30.772075653076172, "std": 30.483257293701172, "min": -37.167205810546875, "p10": -1.0118959426879854, "median": 29.83831787109375, "p90": 61.587267303466795, "max": 124.95426940917969, "pos_frac": 0.890625, "sample": [2.175027847290039, 53.06367492675781, -37.167205810546875, 104.92915344238281, 9.777111053466797, 12.902984619140625, 47.04048156738281, 21.687110900878906, 10.14166259765625, 61.092254638671875, 26.584503173828125, 119.64968872070312, 34.630126953125, 124.95426940917969, 36.18603515625, -12.136039733886719, 72.58563232421875, 34.260658264160156, -13.203544616699219, 13.211250305175781, 18.469078063964844, 16.96912384033203, 61.799415588378906, 40.99213790893555, 30.247314453125, 10.780773162841797, 74.8111572265625, 56.38539123535156, 35.180908203125, 33.710975646972656, 84.92926788330078, 34.91565704345703, 5.1950225830078125, 55.534568786621094, 24.056209564208984, 5.755941390991211, 4.685035705566406, 1.754852294921875, 29.4293212890625, -8.187286376953125, 41.95697021484375, 42.55064392089844, 43.57783126831055, 27.827590942382812, 28.358009338378906, 33.8408088684082, 6.155803680419922, 58.154541015625, 5.188650131225586, 55.71806335449219, 23.727767944335938, 6.9276123046875, 40.56224060058594, 36.92570495605469, 3.257291793823242, 30.739852905273438, 39.07025146484375, 18.0440673828125, 52.800254821777344, -4.9204864501953125, 49.19349670410156, -12.11822509765625, 4.292093276977539, -2.1976451873779297], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000547.npy"} +{"epoch": 0.8032305433186491, "step": 548, "batch_size": 64, "mean": 34.18475341796875, "std": 28.600631713867188, "min": -19.029037475585938, "p10": 0.1901634216308598, "median": 32.903873443603516, "p90": 66.87145919799806, "max": 131.13461303710938, "pos_frac": 0.90625, "sample": [45.61798095703125, 30.672943115234375, 23.298080444335938, 37.98969650268555, 35.39631271362305, 0.6137619018554688, -13.173660278320312, 107.61024475097656, 63.52149200439453, 43.398048400878906, 41.89598846435547, 23.20539093017578, 32.015602111816406, 60.6612548828125, 47.5472412109375, -19.029037475585938, 32.26862335205078, 59.134521484375, 40.949546813964844, 14.210577011108398, 48.81835174560547, 83.65248107910156, 46.82183837890625, 22.905715942382812, 38.20994567871094, 7.805091857910156, 38.976348876953125, 11.681665420532227, 35.970497131347656, 30.419357299804688, -1.2583770751953125, 131.13461303710938, 51.786407470703125, 26.752044677734375, 35.00614929199219, -2.3835887908935547, 6.0411224365234375, 68.93998718261719, 2.965057373046875, 24.518447875976562, 39.66659164428711, -4.321414947509766, 5.925689697265625, 26.90778350830078, 6.239095687866211, 10.828872680664062, 16.685989379882812, 13.645294189453125, 40.232261657714844, -10.096328735351562, 46.87391662597656, 68.30715942382812, 0.0086212158203125, 103.3177490234375, 25.559066772460938, 28.59915542602539, 35.18074035644531, 81.96029663085938, 30.34716796875, 51.013954162597656, 11.849128723144531, 33.53912353515625, 60.28289031982422, 48.70368576049805], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000548.npy"} +{"epoch": 0.8046989720998532, "step": 549, "batch_size": 64, "mean": 34.86151123046875, "std": 25.489124298095703, "min": -18.564903259277344, "p10": 4.038794708251956, "median": 32.99834442138672, "p90": 65.66659469604492, "max": 111.13858032226562, "pos_frac": 0.921875, "sample": [32.84370422363281, 111.13858032226562, 22.024703979492188, 2.762451171875, 59.64579772949219, 74.16119384765625, 33.31620788574219, 67.67070770263672, 51.98634338378906, 46.10649871826172, -1.7220191955566406, 33.17811584472656, 38.03430938720703, 16.380874633789062, 15.777580261230469, 9.96466064453125, 52.679710388183594, -1.9837589263916016, -1.104583740234375, 38.00633239746094, 44.628379821777344, 29.600507736206055, 0.15621185302734375, 59.967620849609375, 46.32416915893555, 20.652729034423828, 23.06128692626953, 7.016929626464844, 59.11322784423828, 41.439697265625, 9.926193237304688, 9.587047576904297, 46.691062927246094, 38.76014709472656, 28.21752166748047, 29.192344665527344, 7.233526229858398, 66.38616943359375, 17.828285217285156, 18.418941497802734, -0.30454254150390625, 29.569164276123047, 63.987586975097656, 32.333683013916016, 104.92596435546875, 37.79817199707031, 31.725296020507812, 57.67111587524414, 49.412200927734375, 53.34686279296875, 47.2646484375, -18.564903259277344, 17.618322372436523, 29.112716674804688, 55.313232421875, 11.566509246826172, 33.658912658691406, 23.420307159423828, 33.152984619140625, 61.932518005371094, 7.14898681640625, 68.34477233886719, 15.483642578125, 80.1491928100586], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000549.npy"} +{"epoch": 0.8061674008810573, "step": 550, "batch_size": 64, "mean": 27.775787353515625, "std": 24.132396697998047, "min": -16.012706756591797, "p10": -1.8939859390258769, "median": 27.042296409606934, "p90": 63.41676292419434, "max": 86.10997772216797, "pos_frac": 0.890625, "sample": [29.80660057067871, 28.283658981323242, 23.923446655273438, 10.148880004882812, 15.67315673828125, 61.506988525390625, 30.091625213623047, 43.32746887207031, 16.34000015258789, 16.507606506347656, 62.24753189086914, 20.68408203125, 74.70690155029297, 33.6294059753418, 14.598983764648438, 14.004554748535156, 45.43895721435547, -16.012706756591797, 20.134246826171875, 38.6733283996582, -13.547210693359375, 4.754539489746094, 70.55899047851562, 16.906339645385742, 2.3245506286621094, 30.7919921875, 8.482498168945312, 48.35679244995117, 42.986454010009766, 25.800933837890625, -4.990264892578125, 17.50799560546875, 28.31922149658203, 1.0043563842773438, 36.39863586425781, 75.12651824951172, 40.57893371582031, 33.64130401611328, 86.10997772216797, 14.540088653564453, 48.05177688598633, 29.098472595214844, 6.561130523681641, -7.0459747314453125, 36.89198303222656, 40.20953369140625, 41.55126953125, 63.91786193847656, 3.6005401611328125, 43.83281326293945, -2.8059616088867188, 69.06498718261719, 0.23395729064941406, 48.03662109375, 2.6533889770507812, 16.333236694335938, 37.79798889160156, 59.91339111328125, 24.17253875732422, 1.5245361328125, -6.4642333984375, -3.097198486328125, 66.1610107421875, 8.089263916015625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000550.npy"} +{"epoch": 0.8076358296622613, "step": 551, "batch_size": 64, "mean": 35.135276794433594, "std": 26.755786895751953, "min": -9.91290283203125, "p10": 3.493674659729005, "median": 33.429561614990234, "p90": 72.58399429321292, "max": 103.28915405273438, "pos_frac": 0.9375, "sample": [22.30221939086914, 27.93878936767578, 39.63220977783203, 10.450035095214844, 52.683258056640625, 92.93846893310547, -3.797943115234375, 60.08882522583008, 43.00921630859375, 33.019493103027344, 36.01988220214844, 34.09882354736328, 2.7948455810546875, 85.77253723144531, 58.8712158203125, 60.281707763671875, 65.56343841552734, -7.4368896484375, 28.88408660888672, 84.037353515625, 25.198883056640625, 55.47758483886719, 62.18406677246094, 103.28915405273438, 41.246856689453125, 9.843032836914062, 75.59280395507812, 48.53614807128906, -3.8112926483154297, 64.81124877929688, 39.251502990722656, 7.630794525146484, 38.677886962890625, 23.998626708984375, 35.01081848144531, 38.49923324584961, 31.439529418945312, 33.839630126953125, 35.14935302734375, 1.0826396942138672, 8.309783935546875, 4.6519775390625, 16.300155639648438, 18.502227783203125, 8.89516830444336, 92.48091125488281, 10.794952392578125, 5.273616790771484, 65.13197326660156, 2.9972591400146484, 8.087455749511719, 25.970993041992188, 52.972816467285156, 26.168289184570312, 26.33061981201172, -9.91290283203125, 15.288963317871094, 35.49842071533203, 52.23723602294922, 25.819358825683594, 25.08028793334961, 81.34125518798828, 37.150054931640625, 19.18668556213379], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000551.npy"} +{"epoch": 0.8091042584434655, "step": 552, "batch_size": 64, "mean": 39.607852935791016, "std": 31.18678092956543, "min": -5.563259124755859, "p10": 9.769074821472168, "median": 31.293537139892578, "p90": 72.88284912109376, "max": 152.21182250976562, "pos_frac": 0.96875, "sample": [27.160369873046875, 34.91820526123047, 9.663818359375, 73.44393920898438, 26.023414611816406, 90.34991455078125, 56.260986328125, 25.98980712890625, 39.742210388183594, 29.894088745117188, 60.038177490234375, 63.744171142578125, -5.563259124755859, 36.426666259765625, 71.57363891601562, 65.4028549194336, 43.162193298339844, 91.09326934814453, 134.3990020751953, 11.091148376464844, 19.010093688964844, 74.78755187988281, 40.015872955322266, 29.134567260742188, 7.369270324707031, 10.567150115966797, 13.348724365234375, 43.116249084472656, 0.10970115661621094, 13.063362121582031, 61.25775146484375, 7.588623046875, 31.80316162109375, 25.22644805908203, 22.320457458496094, 23.409446716308594, 59.40556335449219, 55.88927459716797, 152.21182250976562, 41.89508056640625, 42.083229064941406, 31.411399841308594, 43.53141784667969, 64.89720153808594, 67.27288818359375, 118.03335571289062, 28.406753540039062, 11.007293701171875, 63.0086669921875, 59.50054931640625, 9.629913330078125, 13.454761505126953, 13.141227722167969, 26.856403350830078, 15.215805053710938, 31.175674438476562, -3.4855194091796875, 35.17103576660156, 10.014673233032227, 15.390411376953125, 25.71259307861328, 27.183395385742188, 55.64649963378906, 14.298149108886719], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000552.npy"} +{"epoch": 0.8105726872246696, "step": 553, "batch_size": 64, "mean": 30.661273956298828, "std": 25.554927825927734, "min": -13.285682678222656, "p10": 1.5409591674804697, "median": 26.372347831726074, "p90": 68.4598976135254, "max": 102.76478576660156, "pos_frac": 0.921875, "sample": [100.21258544921875, 58.67072296142578, 25.77642822265625, 25.932281494140625, 27.136383056640625, 6.418663024902344, 18.314422607421875, 0.6119613647460938, 24.500661849975586, 14.835931777954102, 19.592514038085938, 17.15515899658203, 38.43480682373047, 19.76523208618164, -2.5882720947265625, 28.14405059814453, 7.165904998779297, 8.69366455078125, 65.21720123291016, -6.686716079711914, 13.718994140625, 44.174713134765625, 36.721588134765625, 5.242059707641602, 21.28614044189453, 44.63447570800781, 90.29873657226562, 9.549369812011719, 27.091697692871094, 59.35613250732422, -5.6136627197265625, 40.114261627197266, 70.65791320800781, -3.7234344482421875, 64.18419647216797, 20.197113037109375, 36.413726806640625, 30.13311767578125, 40.99774169921875, 1.1420135498046875, 33.550201416015625, 30.433555603027344, 26.812414169311523, 20.690460205078125, 2.471832275390625, 3.0839767456054688, 23.619354248046875, 11.104728698730469, 69.84962463378906, 36.12556076049805, 73.82837677001953, 18.01651382446289, 41.2847900390625, 47.41693115234375, 73.14669799804688, 102.76478576660156, 39.44098663330078, 54.646240234375, 29.34864044189453, 20.916183471679688, -13.285682678222656, 12.806652069091797, 21.19249725341797, 39.17579650878906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000553.npy"} +{"epoch": 0.8120411160058737, "step": 554, "batch_size": 64, "mean": 41.98722839355469, "std": 31.148805618286133, "min": -36.22331237792969, "p10": -0.13457412719726558, "median": 43.11760330200195, "p90": 82.4868522644043, "max": 103.73165893554688, "pos_frac": 0.875, "sample": [20.703109741210938, 30.831405639648438, 36.40701675415039, 5.317108154296875, 74.14825439453125, 74.784912109375, 82.93196105957031, 86.6209945678711, 32.428741455078125, 32.004844665527344, 70.35163879394531, 31.581283569335938, 65.78009033203125, 57.001129150390625, 11.333961486816406, 53.414794921875, 42.18426513671875, -2.8727264404296875, 53.456031799316406, 46.62933349609375, 30.15459632873535, -8.652231216430664, 17.56303596496582, 22.51251983642578, 25.268600463867188, -0.15244293212890625, 59.720314025878906, 44.613677978515625, -6.430335998535156, 81.4482650756836, 61.03160858154297, 20.098480224609375, 68.154541015625, 35.12202453613281, 62.48811721801758, 27.39521026611328, 90.35488891601562, 37.52621078491211, 93.73084259033203, -6.736324310302734, 56.21461486816406, 14.796836853027344, 50.235145568847656, 71.48329162597656, 99.49235534667969, 19.783283233642578, 61.99394226074219, 24.128128051757812, 61.191951751708984, 56.58355712890625, 3.905364990234375, 67.09500122070312, 44.050941467285156, -36.22331237792969, 41.01123809814453, -0.0928802490234375, -10.894561767578125, 15.957441329956055, 72.55169677734375, 52.31431579589844, 103.73165893554688, 100.9422378540039, 44.57886505126953, 12.101879119873047], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000554.npy"} +{"epoch": 0.8135095447870778, "step": 555, "batch_size": 64, "mean": 34.768707275390625, "std": 28.32597541809082, "min": -18.03848648071289, "p10": -3.336881256103511, "median": 34.6026496887207, "p90": 72.52934036254884, "max": 112.2755126953125, "pos_frac": 0.890625, "sample": [37.69239807128906, 34.86283874511719, 42.49775314331055, 48.868255615234375, 34.460540771484375, 83.37469482421875, 29.69775390625, 28.820762634277344, -15.825305938720703, 17.775283813476562, -8.77154541015625, 74.0626449584961, 36.362213134765625, 54.18617248535156, -11.580612182617188, 5.640161514282227, 10.096580505371094, 15.479873657226562, 112.2755126953125, 93.07534790039062, 20.450668334960938, 95.58177185058594, 43.620086669921875, 68.30618286132812, 37.21397399902344, 12.726730346679688, 50.850791931152344, 30.386741638183594, 52.298255920410156, -8.041748046875, 20.034683227539062, 47.1373291015625, 34.74475860595703, 42.11058044433594, 12.778533935546875, 39.18642807006836, 36.67954635620117, 15.232961654663086, 32.04948425292969, 17.51348876953125, -18.03848648071289, 23.235530853271484, 50.11761474609375, 68.95162963867188, 45.414405822753906, -7.587642669677734, 75.66506958007812, 36.169708251953125, 43.74749755859375, -5.232563018798828, 67.50123596191406, 25.6229248046875, 54.94706726074219, 26.094072341918945, 12.633014678955078, 24.162931442260742, 98.40164184570312, 7.431755065917969, 46.476402282714844, 31.725112915039062, 52.63922882080078, 32.646820068359375, 1.0863761901855469, 7.473300933837891], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000555.npy"} +{"epoch": 0.8149779735682819, "step": 556, "batch_size": 64, "mean": 30.53741455078125, "std": 27.87470054626465, "min": -15.957328796386719, "p10": -3.0588584899902327, "median": 27.80112075805664, "p90": 65.71395874023439, "max": 102.3050537109375, "pos_frac": 0.84375, "sample": [67.410888671875, 1.8830184936523438, 23.707847595214844, 22.41180419921875, 30.500137329101562, 16.857032775878906, 51.495582580566406, 26.57550811767578, -9.828033447265625, 42.07044982910156, -8.137689590454102, 50.776493072509766, 61.75445556640625, 29.988868713378906, 22.50316619873047, 39.40790939331055, 87.63065338134766, 41.17230224609375, 34.6246452331543, 14.298702239990234, 11.315620422363281, 41.36164093017578, -5.4460296630859375, 9.346988677978516, -15.957328796386719, 85.10359191894531, 29.0267333984375, 17.525001525878906, 80.20649719238281, -10.616592407226562, 25.44683837890625, 10.936731338500977, 2.6720848083496094, -3.705841064453125, 57.79368591308594, 2.646677017211914, 25.89588165283203, 29.509078979492188, 36.625343322753906, -0.43253517150878906, 0.15848541259765625, 26.541351318359375, 61.046085357666016, 46.49272918701172, 9.443702697753906, 4.398468017578125, 83.69059753417969, 45.512451171875, 53.641632080078125, -1.5492324829101562, 44.142662048339844, 12.288101196289062, 45.091957092285156, 43.172515869140625, 93.96205139160156, 15.484054565429688, 53.71897888183594, 102.3050537109375, 18.327238082885742, 48.211143493652344, 43.64622116088867, -0.3391532897949219, -7.449424743652344, 36.099117279052734], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000556.npy"} +{"epoch": 0.8164464023494861, "step": 557, "batch_size": 64, "mean": 30.403928756713867, "std": 26.89265251159668, "min": -19.168643951416016, "p10": 3.0156908035278325, "median": 25.22757911682129, "p90": 69.8290298461914, "max": 108.72360229492188, "pos_frac": 0.90625, "sample": [29.208831787109375, 48.58769226074219, 24.137428283691406, 35.22798156738281, -3.763864517211914, 54.30537414550781, 3.703125, 39.69506072998047, 20.991409301757812, 14.630805969238281, 31.465606689453125, 20.992050170898438, 24.69390106201172, 28.480682373046875, 5.59466552734375, 27.36724853515625, 84.31671142578125, 14.366378784179688, 46.4764518737793, 14.70684814453125, 20.616134643554688, 108.72360229492188, 72.36575317382812, 63.30774688720703, 39.68928527832031, 66.71099853515625, 5.293975830078125, 23.24671173095703, -11.838239669799805, 69.21028137207031, 17.744495391845703, 41.62562561035156, 86.9784927368164, 23.691051483154297, 2.7571372985839844, 6.4986572265625, 10.455066680908203, 5.9170684814453125, 34.220428466796875, 11.155017852783203, 8.524543762207031, -0.2368297576904297, 29.209604263305664, 24.222126007080078, 3.6189823150634766, -0.412017822265625, 50.72761154174805, 74.83389282226562, 24.4224853515625, 40.88689422607422, 26.143680572509766, 92.21576690673828, 70.09420776367188, 40.339088439941406, 37.62584686279297, -2.8434600830078125, -19.168643951416016, 26.154136657714844, 11.049270629882812, 25.76125717163086, 3.8543014526367188, 66.11769104003906, 42.59381866455078, 6.5634613037109375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000557.npy"} +{"epoch": 0.8179148311306902, "step": 558, "batch_size": 64, "mean": 40.984100341796875, "std": 31.049041748046875, "min": -11.951118469238281, "p10": 10.279118728637696, "median": 32.681020736694336, "p90": 89.30910644531251, "max": 110.35079193115234, "pos_frac": 0.953125, "sample": [26.48309326171875, 6.7919769287109375, 21.539649963378906, 39.179840087890625, 49.846893310546875, 27.507625579833984, 39.68718719482422, 32.366783142089844, 12.77804946899414, -11.951118469238281, 10.700958251953125, 41.28923034667969, 100.65394592285156, 82.26788330078125, 43.211055755615234, 25.64223861694336, 61.027854919433594, 55.076438903808594, 100.06498718261719, 44.201812744140625, 30.881338119506836, 29.969215393066406, 47.67083740234375, 18.945026397705078, 24.06793975830078, 24.02874755859375, 69.04409790039062, 10.503372192382812, 28.57514190673828, 19.190223693847656, 17.444374084472656, 38.0350341796875, 27.500030517578125, 90.62168884277344, 86.24641418457031, 61.56334686279297, 33.1695556640625, 27.440052032470703, 11.329017639160156, 24.004764556884766, 11.54156494140625, 4.024812698364258, 32.99525833129883, 68.28216552734375, 15.646186828613281, -1.5164661407470703, 2.750823974609375, 109.87335205078125, 18.469064712524414, 14.194604873657227, 78.689453125, 100.68074035644531, 66.31121826171875, 67.77743530273438, 83.40478515625, 13.240676879882812, 33.77336502075195, 10.18301010131836, 52.18822479248047, -11.160125732421875, 39.992218017578125, 93.59757995605469, 110.35079193115234, 79.09500122070312], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000558.npy"} +{"epoch": 0.8193832599118943, "step": 559, "batch_size": 64, "mean": 33.14677429199219, "std": 27.877229690551758, "min": -11.05831527709961, "p10": -2.734062385559081, "median": 27.875383377075195, "p90": 76.70242919921877, "max": 104.95195007324219, "pos_frac": 0.875, "sample": [40.46150207519531, 38.307464599609375, 78.70158386230469, 36.13981628417969, 22.25867462158203, 14.564926147460938, 26.090185165405273, 9.268241882324219, 16.302072525024414, 63.18156433105469, 16.05091094970703, 33.52427673339844, 23.998008728027344, 45.4176025390625, 45.84367370605469, 72.03773498535156, 84.89047241210938, 46.581214904785156, 17.90264892578125, 9.7843017578125, 104.95195007324219, 9.339056015014648, -1.6751155853271484, 49.128997802734375, 10.36029052734375, -3.187896728515625, -8.0650634765625, 29.245208740234375, 11.836627960205078, 40.17008972167969, 13.834259033203125, -11.05831527709961, -10.684928894042969, 20.55776596069336, 34.79069519042969, 66.14169311523438, -3.7284812927246094, 7.519458770751953, 48.871395111083984, 22.008167266845703, 46.36509704589844, -3.701751708984375, 5.461700439453125, 29.67675018310547, 21.553680419921875, 26.505558013916016, 9.50595474243164, 56.61417770385742, 81.60838317871094, -3.834442138671875, 46.66333770751953, 60.95709228515625, 44.66471862792969, 16.608970642089844, 49.25276184082031, 82.99403381347656, 14.574260711669922, 86.52752685546875, 45.163970947265625, 38.63023376464844, 58.33393096923828, 12.500808715820312, 23.682720184326172, 99.4212646484375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000559.npy"} +{"epoch": 0.8208516886930984, "step": 560, "batch_size": 64, "mean": 28.90864372253418, "std": 24.986602783203125, "min": -15.746879577636719, "p10": 2.1929201126098636, "median": 21.70637035369873, "p90": 62.6974739074707, "max": 104.5703125, "pos_frac": 0.921875, "sample": [58.72618103027344, 20.550323486328125, 53.09508514404297, 36.30606460571289, 7.677507400512695, 36.6307373046875, -4.0427703857421875, -15.746879577636719, 18.749801635742188, 59.592430114746094, 25.578765869140625, 33.516754150390625, 52.54273986816406, 19.981842041015625, 74.92829895019531, 11.26580810546875, 104.5703125, 74.31087493896484, 18.01250457763672, 15.160835266113281, 10.742660522460938, 16.872413635253906, 41.49916076660156, 77.21817016601562, 62.61683654785156, 21.775108337402344, 11.621135711669922, 22.231918334960938, 29.193695068359375, 21.398101806640625, 52.9918327331543, 10.625541687011719, 14.451873779296875, 42.20458221435547, 54.44920349121094, -14.410148620605469, 18.696334838867188, 90.72933959960938, 22.495758056640625, 65.96050262451172, 19.925308227539062, -4.6580810546875, 26.21987533569336, 5.036956787109375, 62.732032775878906, 16.288040161132812, 1.1301021575927734, 37.01140213012695, 3.8003787994384766, 42.308265686035156, 27.47366714477539, 2.700927734375, 18.947166442871094, 38.137725830078125, -0.4747161865234375, 11.174354553222656, 17.763202667236328, 28.847671508789062, 37.447174072265625, 42.9005126953125, 1.9752025604248047, 5.472221374511719, 21.637632369995117, 11.5849609375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000560.npy"} +{"epoch": 0.8223201174743024, "step": 561, "batch_size": 64, "mean": 29.287330627441406, "std": 26.283649444580078, "min": -18.023117065429688, "p10": -0.26044025421142464, "median": 22.366764068603516, "p90": 68.35388946533203, "max": 99.56396484375, "pos_frac": 0.890625, "sample": [63.917686462402344, 21.575592041015625, 11.38726806640625, -10.536104202270508, -2.0639877319335938, 31.593948364257812, 71.20987701416016, 21.887100219726562, 14.414804458618164, 69.87242889404297, 17.642833709716797, 36.590248107910156, 48.3497314453125, 67.83953857421875, 16.546642303466797, 13.940155029296875, 42.44325637817383, 47.37397003173828, 81.18209075927734, 33.48499298095703, 53.83742904663086, 1.5448455810546875, 39.46882629394531, 57.34165954589844, 24.36638641357422, 18.6976318359375, 78.51678466796875, 44.17609405517578, 21.706880569458008, 40.03291320800781, 33.07218933105469, 95.4078369140625, 7.307670593261719, 56.51375198364258, 55.280914306640625, 22.84642791748047, -3.931304931640625, 2.2464599609375, 13.03264045715332, 11.9637451171875, 12.978944778442383, 0.8418292999267578, 4.280479431152344, 44.169036865234375, 6.44427490234375, -4.6251220703125, 16.603788375854492, 99.56396484375, 16.698881149291992, 35.97789001464844, -15.501480102539062, 16.555999755859375, 34.01835250854492, -0.7328414916992188, 33.04049301147461, 8.022659301757812, 18.31493377685547, 33.880088806152344, 34.18083190917969, 23.138519287109375, 68.57432556152344, -18.023117065429688, 21.621307373046875, 12.283210754394531], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000561.npy"} +{"epoch": 0.8237885462555066, "step": 562, "batch_size": 64, "mean": 37.77778244018555, "std": 26.89278793334961, "min": -10.914375305175781, "p10": 11.000024795532227, "median": 31.29846954345703, "p90": 75.56397247314453, "max": 104.27523803710938, "pos_frac": 0.96875, "sample": [26.26006317138672, 102.72311401367188, 35.77274703979492, 37.34373474121094, 28.015031814575195, 16.523155212402344, 23.08068084716797, 33.788536071777344, 44.08665466308594, 30.35607147216797, 14.845500946044922, 74.59663391113281, 1.6202945709228516, 66.90348815917969, 71.91777038574219, 11.722915649414062, 33.70928955078125, 62.13871765136719, 7.96479606628418, 38.56317138671875, 18.674957275390625, 35.26056671142578, 52.594573974609375, 20.326818466186523, 10.351287841796875, 63.90290069580078, 104.27523803710938, 21.668548583984375, 92.71736145019531, 12.7921142578125, -0.3878288269042969, 16.84815216064453, 24.844717025756836, 54.454803466796875, 19.475326538085938, 102.24946594238281, 10.78057861328125, 41.93115234375, 51.3638916015625, 51.536895751953125, -10.914375305175781, 31.590621948242188, 72.69153594970703, 62.38972473144531, 11.512065887451172, 49.16350173950195, 25.763572692871094, 81.8402328491211, 31.006317138671875, 75.97854614257812, 17.368976593017578, 48.89320373535156, 45.332908630371094, 17.13327407836914, 9.781639099121094, 42.58137512207031, 16.14449691772461, 16.875539779663086, 19.472017288208008, 19.019657135009766, 18.064685821533203, 36.843109130859375, 87.15748596191406, 24.493972778320312], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000562.npy"} +{"epoch": 0.8252569750367107, "step": 563, "batch_size": 64, "mean": 30.286453247070312, "std": 23.748916625976562, "min": -14.263046264648438, "p10": 1.9981216430664075, "median": 28.042617797851562, "p90": 58.233010864257814, "max": 97.16363525390625, "pos_frac": 0.921875, "sample": [10.33302116394043, 27.020095825195312, 7.317840576171875, 17.665298461914062, 47.005863189697266, -12.655563354492188, 76.30087280273438, 20.52227783203125, 22.825416564941406, 45.27464294433594, 15.804405212402344, 38.43141555786133, 12.35335922241211, 57.374664306640625, -0.12212371826171875, 50.886138916015625, 27.909255981445312, 74.12075805664062, 28.175979614257812, 42.33558654785156, 53.267486572265625, -0.5662765502929688, 12.071380615234375, 6.3096466064453125, 0.9320220947265625, -5.865711212158203, 38.28038024902344, 58.409423828125, 54.7802734375, 40.94329833984375, 6.951873779296875, 61.44956970214844, -14.263046264648438, 36.0256233215332, 16.790884017944336, 50.51115417480469, 16.711299896240234, 3.2711868286132812, 46.35157012939453, 36.52632141113281, 67.49828338623047, 48.69268798828125, 57.821380615234375, 37.99201202392578, 85.75653076171875, 43.17840576171875, 20.239952087402344, 97.16363525390625, 21.43695068359375, 35.856964111328125, 6.298969268798828, 32.360511779785156, 17.07160186767578, 11.758108139038086, 31.109329223632812, 10.980117797851562, 31.86974334716797, 1.4525222778320312, 34.769561767578125, 11.396203994750977, 52.94524383544922, 15.18545913696289, 21.768638610839844, 15.962610244750977], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000563.npy"} +{"epoch": 0.8267254038179148, "step": 564, "batch_size": 64, "mean": 32.34123992919922, "std": 22.849916458129883, "min": -15.294261932373047, "p10": 3.799764633178712, "median": 34.44287872314453, "p90": 62.14091453552246, "max": 78.28231811523438, "pos_frac": 0.9375, "sample": [33.74281311035156, 21.402175903320312, 69.60060119628906, 2.6113204956054688, 62.14780044555664, 55.68054962158203, 62.124847412109375, 11.122024536132812, 35.1429443359375, 1.6748199462890625, 36.242950439453125, 31.60706329345703, 36.27971649169922, 41.942169189453125, 60.80027770996094, 9.160675048828125, 33.2357292175293, 15.833625793457031, -15.294261932373047, 14.507797241210938, 43.10481262207031, 5.1782379150390625, 11.671302795410156, -11.300779342651367, 4.6247100830078125, 23.083499908447266, 11.754119873046875, 46.291595458984375, 22.315155029296875, 70.04458618164062, 37.17291259765625, 22.509979248046875, 19.919876098632812, -3.4287338256835938, 42.272911071777344, 38.431480407714844, 49.600616455078125, 48.101219177246094, 43.019683837890625, 3.446216583251953, 54.129547119140625, 38.052947998046875, 13.3992919921875, 10.627738952636719, 36.822235107421875, 10.596435546875, 15.514442443847656, 17.8322696685791, 78.28231811523438, 65.4696044921875, 12.609519958496094, 27.79796600341797, 56.1368408203125, -3.0789222717285156, 56.28315353393555, 15.933006286621094, 51.1096076965332, 76.92648315429688, 50.82176208496094, 36.692840576171875, 64.56988525390625, 55.005226135253906, 22.90078353881836, 58.025291442871094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000564.npy"} +{"epoch": 0.8281938325991189, "step": 565, "batch_size": 64, "mean": 35.16046905517578, "std": 30.857439041137695, "min": -15.841583251953125, "p10": 2.890913200378418, "median": 29.055843353271484, "p90": 72.01796112060549, "max": 148.0425567626953, "pos_frac": 0.953125, "sample": [-1.456634521484375, 148.0425567626953, 36.996002197265625, 9.814571380615234, 10.347366333007812, 65.78953552246094, 84.79447174072266, 28.36602020263672, 24.04505157470703, 17.788116455078125, 29.485931396484375, 59.98567199707031, 17.21771240234375, 49.285675048828125, 10.734947204589844, 12.871685028076172, -3.5054492950439453, 29.92705535888672, 14.533477783203125, 56.10789489746094, 28.625755310058594, 37.01294708251953, 55.511573791503906, 88.763916015625, 17.966415405273438, 17.452163696289062, 54.45840072631836, 42.411521911621094, -15.841583251953125, 40.10393524169922, 30.660064697265625, 23.490591049194336, 33.35930252075195, 46.02055358886719, 3.8183822631835938, 3.20916748046875, 62.887779235839844, 61.472084045410156, 4.180580139160156, 24.196517944335938, 19.472732543945312, 46.686492919921875, 21.021255493164062, 61.02940368652344, 46.47908020019531, 129.19125366210938, 8.08210563659668, 24.425613403320312, 80.3512954711914, 8.240753173828125, 23.659744262695312, 1.5721359252929688, 47.07344055175781, 46.64607620239258, 74.68728637695312, 21.746002197265625, 60.05046081542969, 0.7341785430908203, 33.37558364868164, 3.8460216522216797, 2.754518508911133, 2.712919235229492, 33.31695556640625, 92.18280792236328], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000565.npy"} +{"epoch": 0.8296622613803231, "step": 566, "batch_size": 64, "mean": 33.91465377807617, "std": 30.285097122192383, "min": -19.3923282623291, "p10": 0.17043647766113373, "median": 31.251373291015625, "p90": 72.97052612304688, "max": 112.04925537109375, "pos_frac": 0.890625, "sample": [1.592529296875, 8.058753967285156, 32.70122528076172, 31.716201782226562, 34.4400634765625, -2.352996826171875, 28.492660522460938, 30.786544799804688, -1.2213191986083984, 72.48995971679688, 39.10618591308594, 18.7735595703125, 5.126922607421875, 38.22142791748047, 47.79290771484375, 57.49523162841797, 50.76878356933594, 1.0672340393066406, -0.8996677398681641, 15.0574951171875, 32.27382278442383, 83.05610656738281, 56.489013671875, 8.4373779296875, 48.089569091796875, -19.3923282623291, 53.075714111328125, 45.31653594970703, 15.694513320922852, 47.013671875, 21.757835388183594, 112.04925537109375, 67.98139190673828, 11.605331420898438, 96.64950561523438, 9.355354309082031, -14.286750793457031, 63.90740966796875, 29.700424194335938, -1.7264251708984375, 10.823753356933594, 60.00971984863281, 3.0768051147460938, 27.898033142089844, 32.95323181152344, 80.07038879394531, 73.11791229248047, 35.89250946044922, 6.9878692626953125, 1.2334327697753906, 7.218557357788086, 111.11724853515625, 17.269882202148438, 72.62662506103516, 4.11631965637207, 23.40314483642578, 59.01792907714844, 34.52549743652344, 90.40155029296875, -0.21390533447265625, 10.714519500732422, 26.39453125, 44.70732116699219, 60.91400146484375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000566.npy"} +{"epoch": 0.8311306901615272, "step": 567, "batch_size": 64, "mean": 32.32299041748047, "std": 28.5046329498291, "min": -38.971954345703125, "p10": -0.9530776977539058, "median": 32.892391204833984, "p90": 72.72854919433594, "max": 85.36883544921875, "pos_frac": 0.875, "sample": [5.056243896484375, 40.84173583984375, 54.87506866455078, 69.15830993652344, 13.857376098632812, 38.526641845703125, 11.548408508300781, 26.330970764160156, 63.512184143066406, 43.420738220214844, 50.614295959472656, -4.249977111816406, 66.29872131347656, 32.66712951660156, 76.2315444946289, 47.9208984375, 64.16349792480469, 75.67893981933594, 73.29885864257812, 5.663646697998047, 60.68315124511719, 51.46897888183594, -16.345823287963867, 65.20820617675781, 48.019622802734375, 33.117652893066406, 21.482074737548828, 29.60479736328125, 6.967443466186523, 45.20686340332031, 30.338388442993164, 60.19920349121094, 12.902427673339844, -2.9912033081054688, 38.08071517944336, 2.6700439453125, 33.72812271118164, 82.16459655761719, 85.36883544921875, 79.91029357910156, 20.093154907226562, -38.971954345703125, 18.963607788085938, 4.4123077392578125, 22.852561950683594, 43.048709869384766, 10.7891845703125, 40.26860046386719, 11.970169067382812, -10.011877059936523, -12.9305419921875, 74.62667846679688, 0.8571090698242188, 71.3978271484375, 4.75433349609375, 33.48896026611328, -1.1356201171875, 23.322280883789062, -0.5271453857421875, 34.151546478271484, 51.921470642089844, 8.768863677978516, 1.5780982971191406, 31.783401489257812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000567.npy"} +{"epoch": 0.8325991189427313, "step": 568, "batch_size": 64, "mean": 37.909576416015625, "std": 27.431215286254883, "min": -16.814102172851562, "p10": 2.084158897399906, "median": 37.270748138427734, "p90": 76.54836807250979, "max": 113.576904296875, "pos_frac": 0.90625, "sample": [37.84190368652344, 29.217655181884766, 85.31636810302734, 21.83316421508789, 22.682579040527344, 71.18658447265625, 57.99766540527344, 31.010154724121094, 36.76548767089844, -16.814102172851562, 9.200668334960938, 0.6134357452392578, 81.41046142578125, 83.9456787109375, -1.0751419067382812, 60.85934829711914, 30.06024932861328, 7.362712860107422, 11.727985382080078, 40.86277770996094, 32.432167053222656, 34.839359283447266, 88.02679443359375, 37.77600860595703, 43.41477966308594, 57.38532257080078, 12.215858459472656, 59.72673034667969, 9.684593200683594, 17.11772918701172, 38.716064453125, 61.76588439941406, 48.63213348388672, 59.23768615722656, 57.95121765136719, 34.114707946777344, 43.53224182128906, 30.345985412597656, 39.25969314575195, 78.84627532958984, 113.576904296875, -4.146781921386719, 29.864398956298828, 46.429203033447266, 31.663040161132812, 29.876129150390625, -6.477481842041016, 14.80270767211914, 13.611095428466797, 102.47779083251953, 26.142946243286133, 60.543006896972656, 5.515846252441406, 46.84766387939453, 41.65803527832031, 52.126190185546875, -13.881248474121094, 39.42771911621094, 45.56782150268555, 51.236114501953125, 28.020418167114258, 60.22119140625, -0.8125686645507812, 24.89589500427246], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000568.npy"} +{"epoch": 0.8340675477239354, "step": 569, "batch_size": 64, "mean": 30.12950897216797, "std": 27.329198837280273, "min": -19.886856079101562, "p10": -1.7657257080078113, "median": 26.042662620544434, "p90": 71.48460159301759, "max": 102.46028137207031, "pos_frac": 0.859375, "sample": [5.565757751464844, 15.868202209472656, 102.46028137207031, 38.70249938964844, 22.782562255859375, -0.598724365234375, 8.981918334960938, 72.91715240478516, 2.0687084197998047, 0.4139556884765625, 34.709747314453125, 96.97822570800781, 28.457059860229492, 26.61583709716797, 43.3585090637207, -9.919235229492188, 29.71387481689453, 68.14198303222656, 18.011661529541016, 16.1675968170166, -19.886856079101562, 39.05029296875, 22.870784759521484, 16.093570709228516, 13.154897689819336, 56.35490417480469, 89.67790222167969, 11.272872924804688, -9.090301513671875, -2.3919830322265625, 10.270345687866211, 80.04901123046875, 46.02326202392578, 12.615667343139648, 29.247535705566406, 53.11644744873047, -2.91790771484375, 45.31118392944336, 61.45027160644531, -0.35400390625, 75.19021606445312, 7.097663879394531, -7.696542739868164, 52.46397399902344, 16.66571044921875, 25.4694881439209, 41.77040100097656, 36.8797607421875, 11.474533081054688, 16.09320068359375, 19.41962432861328, 73.60238647460938, 16.24506378173828, 44.07482147216797, 37.618019104003906, -2.265869140625, 12.463264465332031, 41.79172897338867, 51.53117370605469, 31.70184326171875, 27.182510375976562, 23.833274841308594, 65.30543518066406, 37.06158447265625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000569.npy"} +{"epoch": 0.8355359765051396, "step": 570, "batch_size": 64, "mean": 35.00107955932617, "std": 25.37564468383789, "min": -14.023933410644531, "p10": 4.037163543701173, "median": 30.60845375061035, "p90": 69.01341857910157, "max": 107.7603759765625, "pos_frac": 0.90625, "sample": [4.510215759277344, 59.848548889160156, 44.26353073120117, 69.76624298095703, 20.904197692871094, 37.732303619384766, 56.03830337524414, 55.645355224609375, 85.51156616210938, 67.25682830810547, 46.235107421875, 100.43549346923828, 74.76380157470703, 29.087173461914062, 16.644243240356445, 20.623294830322266, 107.7603759765625, 29.42115592956543, 61.81293487548828, 11.822731018066406, 3.8344268798828125, -7.781227111816406, 35.45221710205078, 20.654342651367188, 7.62640380859375, 60.21278381347656, 30.382129669189453, 30.83477783203125, -14.023933410644531, 37.12098693847656, 31.43022918701172, 37.50306701660156, 20.494300842285156, 21.921632766723633, 46.37518310546875, 71.971923828125, -0.4190101623535156, 65.30500793457031, 8.271598815917969, 40.37110900878906, 17.772485733032227, 29.431507110595703, -0.012420654296875, 27.450599670410156, 53.744110107421875, 26.85224151611328, 33.11650848388672, 46.024070739746094, 22.12095832824707, 60.9608154296875, -0.3234138488769531, 22.78466796875, 70.3511734008789, 42.61961364746094, 34.01251220703125, 26.683273315429688, 15.231620788574219, 26.2535400390625, 15.849472045898438, -3.301074981689453, 12.70480728149414, 48.8546142578125, 23.866100311279297, 39.39987564086914], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000570.npy"} +{"epoch": 0.8370044052863436, "step": 571, "batch_size": 64, "mean": 34.58122634887695, "std": 27.801191329956055, "min": -13.674713134765625, "p10": 3.8029483795166046, "median": 29.269794464111328, "p90": 71.80464401245118, "max": 105.92010498046875, "pos_frac": 0.90625, "sample": [53.71867370605469, 38.31494903564453, 24.542186737060547, 15.65155029296875, -5.569719314575195, 43.87464904785156, 8.578075408935547, 39.56095504760742, 36.88017272949219, 46.6158447265625, 100.42495727539062, 22.238628387451172, 52.883583068847656, 11.747550964355469, 26.638774871826172, 20.498048782348633, 66.92218017578125, 105.92010498046875, 20.113128662109375, 51.76775360107422, 56.06555938720703, 100.87132263183594, 43.94084167480469, -2.554962158203125, 6.805091857910156, 30.557844161987305, 42.45030975341797, 43.537254333496094, 9.501375198364258, 37.28715515136719, 25.829702377319336, 13.438323974609375, 85.12600708007812, 70.20263671875, 95.72666931152344, 18.22745132446289, 14.347503662109375, 28.654327392578125, 61.21580123901367, 66.05619049072266, 40.0944938659668, 15.351062774658203, 72.49121856689453, 24.438194274902344, 2.516315460205078, 38.64873504638672, 77.38127136230469, 48.132415771484375, -2.8586463928222656, 25.64197540283203, 10.796939849853516, -3.184783935546875, 42.05565643310547, 16.45850372314453, 11.62939453125, 29.88526153564453, 21.527423858642578, 7.330753326416016, -13.674713134765625, 37.59329605102539, 12.846630096435547, 54.776634216308594, 21.921920776367188, -7.2098388671875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000571.npy"} +{"epoch": 0.8384728340675477, "step": 572, "batch_size": 64, "mean": 32.25321960449219, "std": 31.98067283630371, "min": -42.118682861328125, "p10": -3.7617568969726545, "median": 29.165355682373047, "p90": 75.708935546875, "max": 122.31629943847656, "pos_frac": 0.84375, "sample": [1.7519302368164062, -10.956329345703125, 18.346221923828125, 50.95433044433594, 76.92770385742188, 35.16020965576172, -4.6256256103515625, 8.286155700683594, -7.094387054443359, 6.063240051269531, 10.951292037963867, 3.0989227294921875, 53.72795867919922, 13.041667938232422, 72.49177551269531, 49.30182647705078, 38.962921142578125, 83.33039093017578, 12.654678344726562, 51.88314437866211, 3.8181838989257812, 27.30145263671875, 41.48974609375, 54.83966827392578, 16.80046844482422, -1.1311397552490234, -6.713161468505859, 9.335746765136719, -10.9730224609375, 58.20817565917969, 44.22862243652344, 16.640296936035156, 51.897884368896484, 21.423202514648438, 4.319377899169922, 122.31629943847656, 52.69715118408203, 28.92089080810547, 48.484039306640625, 39.44709777832031, 66.0565185546875, 26.63994598388672, -42.118682861328125, 58.03620147705078, -0.74505615234375, -41.125762939453125, 91.07815551757812, 24.242576599121094, 26.612394332885742, -1.746063232421875, 75.93055725097656, 14.350914001464844, 20.91588592529297, 53.35247039794922, 16.992511749267578, 33.28792190551758, 75.19181823730469, 91.34498596191406, 34.1065673828125, 38.520233154296875, 37.937294006347656, 79.60734558105469, 68.71839904785156, 29.409820556640625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000572.npy"} +{"epoch": 0.8399412628487518, "step": 573, "batch_size": 64, "mean": 34.536949157714844, "std": 28.615467071533203, "min": -20.89297103881836, "p10": 2.5046955108642592, "median": 29.955368041992188, "p90": 73.86744689941406, "max": 107.09217071533203, "pos_frac": 0.921875, "sample": [53.073509216308594, 14.00285530090332, 33.99073028564453, 19.233062744140625, 27.077674865722656, 54.04823303222656, 13.376174926757812, 25.998146057128906, 72.70304107666016, 64.93045043945312, 15.609420776367188, 44.80467987060547, 0.6434249877929688, 107.09217071533203, 45.014739990234375, 84.39158630371094, 27.334693908691406, -20.89297103881836, 7.161041259765625, 15.068328857421875, 37.920494079589844, 57.64421844482422, 11.949851989746094, 61.4671630859375, -6.830513000488281, 75.80953979492188, 93.88723754882812, 72.64151000976562, 31.487041473388672, 52.848876953125, -11.810684204101562, 12.33087158203125, 9.112201690673828, 45.7841796875, 31.776023864746094, 11.055122375488281, 53.850074768066406, -9.861648559570312, 42.83153533935547, -5.785957336425781, 91.36219787597656, 20.883934020996094, 60.803955078125, 3.968341827392578, 5.3654632568359375, 8.214475631713867, 40.408905029296875, 89.56549072265625, 29.970443725585938, 20.970314025878906, 46.33074951171875, 28.69597625732422, 3.9993896484375, 19.250869750976562, 25.971206665039062, 1.8774185180664062, 52.475738525390625, 74.3664779663086, 65.75013732910156, 29.940292358398438, 28.28973388671875, 34.09251403808594, 12.597221374511719, 42.44544219970703], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000573.npy"} +{"epoch": 0.8414096916299559, "step": 574, "batch_size": 64, "mean": 34.14720916748047, "std": 28.386531829833984, "min": -31.84403419494629, "p10": 5.22524528503418, "median": 27.535313606262207, "p90": 77.24646072387696, "max": 92.2944107055664, "pos_frac": 0.953125, "sample": [12.281944274902344, 47.953033447265625, 12.498611450195312, 23.508033752441406, 18.256134033203125, 11.301193237304688, -31.84403419494629, 75.58221435546875, 38.54505157470703, 54.36053466796875, 39.45874786376953, 57.44956970214844, 69.03972625732422, 61.719383239746094, 33.11177062988281, 16.786026000976562, 10.448053359985352, 22.963943481445312, 19.572601318359375, 91.05045318603516, 5.4846343994140625, 7.6846923828125, 13.15496826171875, 44.7767333984375, 22.13885498046875, 68.73348236083984, 34.93993377685547, 1.30950927734375, 14.616024017333984, 16.107799530029297, 36.14459228515625, 8.572032928466797, 37.92041015625, 92.2944107055664, 5.598182678222656, 21.900480270385742, -23.418128967285156, 69.218505859375, 23.815628051757812, 29.238487243652344, 89.50398254394531, 10.590972900390625, 83.14996337890625, 51.109275817871094, -6.15960693359375, 13.51727294921875, 3.9213485717773438, 23.747180938720703, 36.69096374511719, 23.55670166015625, 44.97303009033203, 5.114078521728516, 1.6260242462158203, 85.31735229492188, 45.48577880859375, 32.02935028076172, 43.811798095703125, 87.66873168945312, 41.89586639404297, 59.10459899902344, 77.95970916748047, 67.8709487915039, 22.82963752746582, 25.83213996887207], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000574.npy"} +{"epoch": 0.8428781204111601, "step": 575, "batch_size": 64, "mean": 36.32011795043945, "std": 31.866594314575195, "min": -36.50041198730469, "p10": -3.2526168823242183, "median": 32.09042930603027, "p90": 76.4083480834961, "max": 106.14752197265625, "pos_frac": 0.828125, "sample": [18.72315216064453, 51.72999572753906, 91.1700210571289, -3.4823455810546875, 57.0179443359375, 29.291961669921875, 51.31146240234375, 68.2662124633789, 39.066139221191406, 35.505577087402344, 23.453344345092773, 29.6309814453125, -10.351409912109375, 29.350021362304688, 47.24787902832031, 64.6013412475586, 14.984039306640625, 84.34615325927734, 32.42154312133789, 60.84223937988281, 52.27214050292969, 22.126678466796875, 75.26551818847656, 14.63775634765625, 26.68351173400879, -9.641242980957031, -15.997146606445312, 49.311519622802734, 13.045158386230469, 31.00405502319336, 24.483930587768555, 81.15792846679688, 33.41722869873047, 68.97946166992188, -2.716583251953125, -2.606964111328125, 69.07010650634766, -1.7978076934814453, 106.14752197265625, 43.73773193359375, 31.759315490722656, 3.6676864624023438, -27.05779266357422, 57.26873779296875, 84.97976684570312, -36.50041198730469, 61.919090270996094, 57.93175506591797, 31.328414916992188, 11.2294921875, 7.1225433349609375, 101.56460571289062, 57.901641845703125, 22.528533935546875, 73.2019271850586, 29.466100692749023, 63.38935852050781, 22.602821350097656, 76.89813232421875, -0.6434516906738281, -18.17017364501953, 37.576316833496094, 25.58861541748047, 55.227806091308594], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000575.npy"} +{"epoch": 0.8443465491923642, "step": 576, "batch_size": 64, "mean": 34.768131256103516, "std": 30.357736587524414, "min": -30.184276580810547, "p10": 6.310462188720703, "median": 27.14690399169922, "p90": 80.69935455322268, "max": 121.17652130126953, "pos_frac": 0.953125, "sample": [51.260780334472656, 92.17681884765625, -30.184276580810547, 21.310161590576172, 0.8250656127929688, 6.435634613037109, 19.497520446777344, 54.0133056640625, 34.111148834228516, 22.102386474609375, 37.71288299560547, 88.82255554199219, 19.04845428466797, 26.439468383789062, 52.8826904296875, 40.2076416015625, 69.2947998046875, 17.534317016601562, 74.43633270263672, 9.280471801757812, 37.464202880859375, 43.15277099609375, 58.98612976074219, 93.11802673339844, 14.011524200439453, 94.60043334960938, 12.462520599365234, 9.999156951904297, 61.12139892578125, 56.098785400390625, -21.692115783691406, 27.854339599609375, 30.518585205078125, 6.256816864013672, 46.236541748046875, 83.38350677490234, 121.17652130126953, 15.532196044921875, 11.656583786010742, 51.652732849121094, 9.333541870117188, 35.53227996826172, 29.50529670715332, 36.43742370605469, 11.77392578125, 0.0980987548828125, 9.003082275390625, 24.13927459716797, -2.530059814453125, 20.29475975036621, 16.377342224121094, 99.35942077636719, 65.81587219238281, 65.6484375, 7.666751861572266, 1.7664852142333984, 23.748313903808594, 19.574920654296875, 33.42936706542969, 16.360626220703125, 37.22698211669922, 21.502639770507812, 14.439285278320312, 67.85749053955078], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000576.npy"} +{"epoch": 0.8458149779735683, "step": 577, "batch_size": 64, "mean": 38.08292007446289, "std": 28.71381950378418, "min": -25.922870635986328, "p10": 5.749527168273927, "median": 36.8105411529541, "p90": 72.37517013549805, "max": 109.6441650390625, "pos_frac": 0.90625, "sample": [7.666839599609375, 109.28489685058594, 38.42860412597656, 85.91873931884766, 9.286764144897461, 37.59251403808594, 36.29738235473633, 43.38507843017578, 39.86759948730469, 14.24322509765625, 69.3262939453125, 23.0321044921875, 20.719173431396484, 60.06378173828125, 32.99236297607422, 66.66358184814453, 109.6441650390625, 89.72285461425781, 37.323699951171875, 25.77279281616211, 17.48367691040039, 32.151817321777344, 8.550987243652344, 20.455902099609375, 70.74871826171875, 21.190784454345703, 12.17502212524414, 60.91028594970703, 71.11196899414062, 34.047813415527344, 51.363861083984375, 5.3868255615234375, 27.280202865600586, 38.12623596191406, 64.77940368652344, 37.43603515625, 72.91654205322266, 29.52768325805664, 30.53734588623047, -1.400360107421875, 6.595830917358398, 7.405364990234375, 41.29264831542969, 25.909774780273438, -2.411346435546875, -4.077579498291016, -2.4596939086914062, 48.50053024291992, 14.707130432128906, 99.79006958007812, 40.480735778808594, 45.34208679199219, 46.199615478515625, 15.790632247924805, 79.39199829101562, 70.64299011230469, 57.82637405395508, -0.4207611083984375, -25.922870635986328, 35.027889251708984, 42.104774475097656, 20.765907287597656, 51.87666320800781, 60.934783935546875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000577.npy"} +{"epoch": 0.8472834067547724, "step": 578, "batch_size": 64, "mean": 35.280113220214844, "std": 27.42318344116211, "min": -13.554534912109375, "p10": 3.180938148498536, "median": 28.675315856933594, "p90": 79.34371719360352, "max": 95.07947540283203, "pos_frac": 0.921875, "sample": [67.2209701538086, 44.40078353881836, 38.894561767578125, 8.521923065185547, 57.494224548339844, 40.68440628051758, 88.086669921875, 15.2867431640625, 14.242542266845703, -11.32577133178711, 4.043787002563477, 78.9838638305664, 48.310028076171875, 25.447349548339844, 27.669219970703125, 22.99980926513672, 47.239227294921875, 10.509071350097656, 55.988037109375, 66.310791015625, 26.55780029296875, 45.40327453613281, 15.785463333129883, 8.802452087402344, -4.270771026611328, 2.811145782470703, 50.61630630493164, 22.077028274536133, 34.13687515258789, 19.97014617919922, 29.681411743164062, 14.446578979492188, 32.809349060058594, 86.62873840332031, 16.288070678710938, 84.10977172851562, 58.213539123535156, 6.165679931640625, -1.2089157104492188, 13.72909164428711, 39.73204803466797, 43.11566925048828, 53.45431900024414, 11.325328826904297, 77.6688461303711, 27.238677978515625, 13.529335021972656, 2.5403594970703125, 32.14317321777344, 24.072378158569336, 40.74182891845703, 84.82832336425781, 19.732643127441406, 85.23329162597656, 65.72550964355469, 18.489871978759766, -2.058441162109375, 79.49794006347656, 95.07947540283203, 57.1417236328125, 36.32989501953125, -13.554534912109375, 26.200714111328125, 25.957550048828125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000578.npy"} +{"epoch": 0.8487518355359766, "step": 579, "batch_size": 64, "mean": 30.629066467285156, "std": 23.335224151611328, "min": -9.793693542480469, "p10": 2.567663574218751, "median": 26.81344985961914, "p90": 63.737873077392585, "max": 94.16844940185547, "pos_frac": 0.953125, "sample": [1.0652923583984375, 39.173095703125, 19.500160217285156, 47.677345275878906, 6.7056884765625, -0.97918701171875, 33.553199768066406, 57.915069580078125, 36.40251541137695, 64.30864715576172, 4.992195129394531, 24.842021942138672, 9.389228820800781, 43.511810302734375, 69.80670166015625, 36.5333251953125, 15.600326538085938, 60.06966018676758, 94.16844940185547, -8.633148193359375, -9.793693542480469, 22.557594299316406, 22.840492248535156, 15.440498352050781, 32.77796173095703, 40.048057556152344, 31.295440673828125, 7.767255783081055, 31.726009368896484, 38.449607849121094, 43.82609558105469, 7.0017242431640625, 19.0853271484375, 79.68719482421875, 67.51653289794922, 50.43206787109375, 47.84846115112305, 3.2503814697265625, 14.177841186523438, 35.96259307861328, 22.064788818359375, 17.30721664428711, 54.13520050048828, 0.0911407470703125, 81.03179168701172, 14.537252426147461, 42.76948165893555, 18.815322875976562, 2.2750701904296875, 44.33643341064453, 32.82794952392578, 22.57024383544922, 28.081932067871094, 24.467453002929688, 23.056365966796875, 7.4793701171875, 17.29058074951172, 33.087867736816406, 1.7942390441894531, 42.811798095703125, 25.544967651367188, 76.83597564697266, 7.141931533813477, 62.40606689453125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000579.npy"} +{"epoch": 0.8502202643171806, "step": 580, "batch_size": 64, "mean": 40.519554138183594, "std": 30.12722396850586, "min": -17.143766403198242, "p10": 3.5681438446044975, "median": 41.28202819824219, "p90": 67.65886688232422, "max": 145.67376708984375, "pos_frac": 0.9375, "sample": [26.308273315429688, 67.28091430664062, 20.781448364257812, 28.884498596191406, 28.561573028564453, 41.579734802246094, 63.20359802246094, 117.3607406616211, 14.771480560302734, 67.82084655761719, 47.12157440185547, 14.360763549804688, 145.67376708984375, 16.85724639892578, 10.928226470947266, 67.25436401367188, 47.139137268066406, 1.3014183044433594, 80.09905242919922, 57.337364196777344, 29.104860305786133, 60.7650146484375, 38.945152282714844, 45.57301712036133, 26.347583770751953, 48.06822967529297, 37.257720947265625, 59.44253158569336, 54.68095397949219, 9.933189392089844, 42.65547180175781, 61.61530685424805, 16.87537384033203, 0.7694454193115234, 91.43052673339844, 66.07443237304688, 21.94426727294922, -17.143766403198242, 55.80638885498047, 59.346519470214844, 50.689117431640625, 49.12237548828125, 27.549243927001953, 66.18511962890625, 10.730865478515625, 42.05836486816406, -15.445823669433594, -10.16343879699707, 16.770477294921875, 78.07460021972656, 62.193023681640625, 0.7975387573242188, 54.18293762207031, 40.98432159423828, 25.967147827148438, 17.451936721801758, 8.857170104980469, 40.606689453125, -1.302001953125, 17.492515563964844, 34.38508605957031, 91.29188537597656, 54.38726043701172, 56.266876220703125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000580.npy"} +{"epoch": 0.8516886930983847, "step": 581, "batch_size": 64, "mean": 31.124555587768555, "std": 24.851299285888672, "min": -12.298234939575195, "p10": 3.7582130432128906, "median": 26.301854133605957, "p90": 63.35090560913087, "max": 81.91606903076172, "pos_frac": 0.921875, "sample": [61.1529541015625, 15.904541015625, 34.85566711425781, 14.301368713378906, 45.456817626953125, 26.72713279724121, -7.756866455078125, 25.5740966796875, 1.0033950805664062, 42.717098236083984, 19.429641723632812, 50.56804656982422, 57.634918212890625, 64.28437042236328, 42.92445373535156, -7.1319122314453125, 78.55279541015625, 16.668479919433594, 81.86958312988281, 81.29298400878906, 4.696739196777344, 61.172821044921875, 10.809043884277344, 13.3367919921875, 16.244422912597656, 33.366790771484375, 40.8398551940918, 6.9217071533203125, 32.62434387207031, 23.80748748779297, -12.298234939575195, 76.41474914550781, 25.876575469970703, 30.253814697265625, 48.615875244140625, -11.848670959472656, 60.91899108886719, 59.075096130371094, -6.0829925537109375, 52.3740234375, 41.50480651855469, 29.554954528808594, 25.55693817138672, 38.397979736328125, 49.64430236816406, 13.829246520996094, 22.410720825195312, 59.28932189941406, 73.28923034667969, 5.515716552734375, 3.8405380249023438, 12.08404541015625, 15.493597030639648, 5.3148345947265625, 48.541282653808594, 24.01641082763672, 11.186405181884766, 10.11181640625, 3.722930908203125, 28.833518981933594, 19.63364601135254, 81.91606903076172, 22.188232421875, 32.946266174316406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000581.npy"} +{"epoch": 0.8531571218795888, "step": 582, "batch_size": 64, "mean": 35.69976043701172, "std": 29.193857192993164, "min": -15.417526245117188, "p10": -0.13444519042968744, "median": 30.738082885742188, "p90": 77.87541580200195, "max": 106.04348754882812, "pos_frac": 0.875, "sample": [-15.417526245117188, 39.29399871826172, 60.57240295410156, 62.317142486572266, 77.86273193359375, -1.394195556640625, 54.97595977783203, 41.785797119140625, 31.324508666992188, 39.09074783325195, 12.730936050415039, 47.82637023925781, 11.6439208984375, 68.87773132324219, 16.433326721191406, 73.39147186279297, 12.45695686340332, 21.308120727539062, 51.494834899902344, 80.30536651611328, 30.802536010742188, 13.589790344238281, 68.91756439208984, 18.396347045898438, 20.675506591796875, 98.90101623535156, 23.945083618164062, 32.662498474121094, 29.748367309570312, 50.54241943359375, 30.673629760742188, 77.88085174560547, 49.70170593261719, 15.593711853027344, -5.0238037109375, 36.929935455322266, 10.763740539550781, -0.5755081176757812, 29.864593505859375, 56.65554428100586, 9.694808959960938, 17.80522918701172, 106.04348754882812, 77.88252258300781, 0.484893798828125, 62.984039306640625, -6.557960510253906, -0.08222770690917969, 11.72580337524414, 36.15778350830078, 3.5365028381347656, 81.59649658203125, 88.80642700195312, 27.289443969726562, -3.584260940551758, 0.9269447326660156, 59.18406677246094, 13.725513458251953, 32.177947998046875, 25.099349975585938, 70.99969482421875, -0.15682411193847656, 25.440994262695312, 66.07781219482422], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000582.npy"} +{"epoch": 0.8546255506607929, "step": 583, "batch_size": 64, "mean": 29.54582977294922, "std": 29.761417388916016, "min": -27.016159057617188, "p10": -1.468618011474609, "median": 24.281688690185547, "p90": 66.2842010498047, "max": 161.9598846435547, "pos_frac": 0.875, "sample": [28.480804443359375, 63.511199951171875, 36.87066650390625, 57.855377197265625, -10.852022171020508, 5.8112030029296875, 45.325714111328125, 41.7979621887207, 161.9598846435547, 20.851150512695312, 14.54840087890625, 20.28139305114746, 23.58354949951172, 35.814693450927734, 24.979827880859375, -1.6269607543945312, 73.81275939941406, 12.89727783203125, 54.35070037841797, -9.189292907714844, 18.578712463378906, 15.214956283569336, 28.85555648803711, -27.016159057617188, 9.815408706665039, 2.3639984130859375, 37.92140197753906, 3.5219039916992188, -22.861305236816406, 30.580745697021484, 17.38991928100586, 23.505340576171875, 45.190895080566406, 67.30960083007812, 40.411720275878906, 70.02027893066406, 33.607444763183594, 0.5143966674804688, 63.8916015625, 17.19793701171875, -3.0030517578125, 51.8599739074707, 38.04918670654297, -1.099151611328125, 59.4991455078125, 13.906356811523438, 71.34384155273438, 14.066299438476562, -5.370929718017578, 3.9783401489257812, 57.67316436767578, 26.412147521972656, 5.0936431884765625, 47.325416564941406, 20.42376708984375, 7.2555389404296875, 26.489978790283203, 14.386762619018555, 33.3310546875, 74.16085815429688, 76.41008758544922, 44.93196105957031, 18.272369384765625, 18.45770263671875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000583.npy"} +{"epoch": 0.856093979441997, "step": 584, "batch_size": 64, "mean": 39.369117736816406, "std": 26.277454376220703, "min": -9.205215454101562, "p10": 8.617363739013673, "median": 34.00349426269531, "p90": 77.99864959716797, "max": 95.33646392822266, "pos_frac": 0.96875, "sample": [13.054931640625, 51.61073303222656, 20.577377319335938, 27.40282440185547, 5.690277099609375, 33.52699279785156, 45.42463684082031, 76.29351043701172, 39.892120361328125, 13.003490447998047, 11.769989013671875, 20.96080780029297, 15.019317626953125, 73.9131088256836, 57.397705078125, 80.18995666503906, 22.822933197021484, 95.33646392822266, 91.8830795288086, 0.5787887573242188, 57.39892578125, 8.594459533691406, 15.726844787597656, 31.552040100097656, 5.974763870239258, -9.205215454101562, 93.3873291015625, 33.93141174316406, 37.523193359375, 59.11524200439453, 34.00616455078125, 26.98614501953125, 17.019004821777344, -5.689460754394531, 49.25773620605469, 70.79029846191406, 34.000823974609375, 65.2655258178711, 0.7107391357421875, 26.843841552734375, 58.341636657714844, 37.86872863769531, 22.58587646484375, 89.28167724609375, 19.452213287353516, 44.34600067138672, 78.72942352294922, 47.025169372558594, 22.328842163085938, 26.060409545898438, 41.258766174316406, 52.11112976074219, 66.35164642333984, 65.92791748046875, 40.223777770996094, 74.25635528564453, 29.145750045776367, 8.670806884765625, 39.51100158691406, 51.645301818847656, 30.00871467590332, 33.835723876953125, 79.80787658691406, 11.310104370117188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000584.npy"} +{"epoch": 0.8575624082232012, "step": 585, "batch_size": 64, "mean": 33.05250549316406, "std": 28.828380584716797, "min": -26.50246810913086, "p10": -0.8342622756957998, "median": 30.383756637573242, "p90": 72.69863662719729, "max": 110.55718231201172, "pos_frac": 0.890625, "sample": [29.395706176757812, 19.138832092285156, 74.86255645751953, 49.23896789550781, 0.8308639526367188, 80.91217041015625, 38.31657791137695, -2.3613815307617188, 38.09486389160156, 31.884613037109375, 56.553062438964844, 27.902801513671875, 5.97564697265625, 34.34392547607422, 44.835052490234375, 56.74528884887695, 67.64949035644531, 24.333457946777344, 106.6443862915039, 60.071266174316406, 50.6572380065918, 110.55718231201172, 11.417991638183594, 42.9365234375, 9.250160217285156, 33.90076446533203, 31.532228469848633, 34.00971984863281, 95.06317901611328, 25.878753662109375, 1.0285186767578125, 20.362762451171875, 11.619560241699219, -10.3612060546875, 34.56511688232422, 35.86144256591797, 46.36305236816406, -26.50246810913086, 13.848731994628906, 83.16921997070312, 28.902786254882812, 65.32435607910156, 38.74568176269531, 0.679107666015625, 39.57086181640625, 26.203720092773438, 20.01618194580078, 2.5937957763671875, 19.614349365234375, 29.895322799682617, 0.06778907775878906, -7.319873809814453, 15.88137435913086, -2.896148681640625, 3.151611328125, 84.91143798828125, 57.63140869140625, 30.61959457397461, 61.57288360595703, -1.33392333984375, -1.220855712890625, 26.17485809326172, 30.147918701171875, 45.89958190917969], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000585.npy"} +{"epoch": 0.8590308370044053, "step": 586, "batch_size": 64, "mean": 32.81856155395508, "std": 29.983264923095703, "min": -29.324594497680664, "p10": 0.5303192138671879, "median": 28.02629852294922, "p90": 79.1125915527344, "max": 98.31903839111328, "pos_frac": 0.90625, "sample": [98.31903839111328, 28.803855895996094, 13.45071029663086, 40.84418487548828, 39.680824279785156, 25.220869064331055, 5.5749969482421875, 24.300987243652344, 37.53083801269531, 62.60773468017578, 48.975914001464844, -2.5385208129882812, 31.857070922851562, 86.50299072265625, -7.941583633422852, 22.301532745361328, 30.27655792236328, -24.846357345581055, 31.400650024414062, 12.142675399780273, 14.497163772583008, 15.779748916625977, 42.60021209716797, 68.05400085449219, 65.41996765136719, -22.871170043945312, 44.373634338378906, 17.33307647705078, 8.924400329589844, 20.598907470703125, 67.41313171386719, 27.011497497558594, 46.03749084472656, 15.913719177246094, 15.027006149291992, 18.875396728515625, 45.518280029296875, -3.4667434692382812, 4.852455139160156, 40.46446228027344, 92.92599487304688, 11.1746826171875, 83.99687194824219, 41.89691162109375, 0.9389266967773438, -29.324594497680664, 27.248741149902344, 42.36798095703125, 81.62953186035156, 15.268730163574219, 0.35520172119140625, 10.436424255371094, 10.467002868652344, 96.48014831542969, 83.20854949951172, 31.888519287109375, 73.23973083496094, 71.64840698242188, 5.432655334472656, 10.646827697753906, 42.39500427246094, 73.092529296875, 27.0009765625, 39.15058898925781], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000586.npy"} +{"epoch": 0.8604992657856094, "step": 587, "batch_size": 64, "mean": 30.581205368041992, "std": 27.596582412719727, "min": -15.604284286499023, "p10": -0.7595949172973632, "median": 25.80141258239746, "p90": 68.98250579833986, "max": 99.910400390625, "pos_frac": 0.875, "sample": [38.50349044799805, 62.854522705078125, 25.200889587402344, -2.881786346435547, 59.22441864013672, 1.1770668029785156, 15.78924560546875, 3.8502159118652344, 1.6703853607177734, 36.593177795410156, 9.74453353881836, 44.250160217285156, 31.4708251953125, 4.373292922973633, 32.19746780395508, 42.97876739501953, 1.3969001770019531, 34.78291320800781, 18.323043823242188, 15.104938507080078, 1.7452774047851562, 22.288909912109375, 56.50956726074219, 8.735553741455078, 49.70497131347656, 99.910400390625, -1.0286197662353516, 81.81370544433594, 35.97405242919922, 10.549201965332031, 13.006265640258789, 31.715797424316406, 0.6088485717773438, 24.320877075195312, 64.45268249511719, 26.401935577392578, 6.846595764160156, 12.438941955566406, 7.7163848876953125, -2.4179458618164062, -0.6443386077880859, 57.680511474609375, -15.604284286499023, 77.5252685546875, 20.710481643676758, 76.79899597167969, 1.9376029968261719, -2.820995330810547, 46.87062072753906, 54.375457763671875, 70.92385864257812, 88.78900146484375, 51.886451721191406, 55.19872283935547, 60.076934814453125, 41.666748046875, 82.43949890136719, 21.46044921875, 29.204254150390625, -1.2178211212158203, -0.808990478515625, 11.261207580566406, 51.96764373779297, 49.62190246582031], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000587.npy"} +{"epoch": 0.8619676945668135, "step": 588, "batch_size": 64, "mean": 32.04176330566406, "std": 28.54863929748535, "min": -15.686531066894531, "p10": -0.9694806098937987, "median": 27.937307357788086, "p90": 80.6527755737305, "max": 103.32987976074219, "pos_frac": 0.875, "sample": [31.195886611938477, 19.868560791015625, 10.7421875, 20.62070083618164, 41.446990966796875, 58.550872802734375, 37.84598159790039, 23.435422897338867, 33.906402587890625, 56.93330383300781, -15.686531066894531, 21.196510314941406, 8.196125030517578, 6.796113967895508, -5.178886413574219, 36.45833206176758, 6.3531036376953125, 51.55815887451172, -1.1775894165039062, 24.263320922851562, 34.79705810546875, 26.772201538085938, -13.926856994628906, 51.37837219238281, 2.0860443115234375, 15.40423583984375, 86.14175415039062, 21.850337982177734, 14.255157470703125, 14.633827209472656, 36.76939010620117, 3.020803451538086, 12.545364379882812, 84.73355102539062, 53.16007995605469, 6.83326530456543, 67.67726135253906, 14.689216613769531, 17.86246109008789, 67.63256072998047, 83.58330535888672, 34.241939544677734, 12.337631225585938, 35.01495361328125, 55.3040771484375, -7.837059020996094, 16.14508819580078, 85.20172119140625, -5.135009765625, 64.57546997070312, -1.0401973724365234, 84.01055908203125, 85.81671142578125, 38.384613037109375, 73.81487274169922, 41.55164337158203, 29.47727394104004, 15.717174530029297, -0.8044748306274414, 55.443809509277344, 35.325355529785156, 1.4702682495117188, 103.32987976074219, 29.102413177490234], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000588.npy"} +{"epoch": 0.8634361233480177, "step": 589, "batch_size": 64, "mean": 44.363983154296875, "std": 33.08956527709961, "min": -21.234203338623047, "p10": 3.367512512207033, "median": 42.10517883300781, "p90": 94.13206253051759, "max": 133.8654022216797, "pos_frac": 0.921875, "sample": [71.2660140991211, 56.591514587402344, 41.76243591308594, -2.396343231201172, 38.041473388671875, -3.5407180786132812, 11.195423126220703, 94.60340118408203, 43.00630187988281, 21.468666076660156, 31.603912353515625, 22.879135131835938, 49.277687072753906, 10.576286315917969, 18.382408142089844, 116.78683471679688, 57.292327880859375, -7.921600341796875, 26.822280883789062, 44.3276252746582, 70.39220428466797, 21.34259033203125, -6.45111083984375, 101.60272979736328, 27.200836181640625, 15.631240844726562, 35.987823486328125, 30.15850830078125, 25.941238403320312, 21.43699836730957, 113.25706481933594, 70.72862243652344, 56.690223693847656, 55.54195022583008, 93.03227233886719, 36.12492370605469, 44.007080078125, 16.22570037841797, 63.308189392089844, 2.68658447265625, 69.59013366699219, 4.9563446044921875, 48.00331115722656, 42.44792175292969, 58.34016036987305, 32.61220169067383, 112.97628784179688, 22.308563232421875, 64.79576873779297, 133.8654022216797, 111.51055908203125, 42.65809631347656, 38.566917419433594, 38.842559814453125, 21.307144165039062, 30.41885757446289, 70.54997253417969, 48.017120361328125, 2.334430694580078, 53.296234130859375, 67.71184539794922, 44.64720153808594, -21.234203338623047, 63.901466369628906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000589.npy"} +{"epoch": 0.8649045521292217, "step": 590, "batch_size": 64, "mean": 38.70075607299805, "std": 28.155410766601562, "min": -14.5260009765625, "p10": 8.757490539550782, "median": 35.202884674072266, "p90": 84.04414978027344, "max": 111.41190338134766, "pos_frac": 0.96875, "sample": [8.221893310546875, 111.41190338134766, 64.25003051757812, 39.379085540771484, 21.42511749267578, 9.929115295410156, 42.70819091796875, 10.52471923828125, 67.2286605834961, 22.96160888671875, 10.387163162231445, 7.060420989990234, 14.96533203125, 72.96366882324219, 82.91470336914062, 23.505496978759766, 13.35396957397461, -6.042877197265625, 34.79646301269531, 37.27685546875, 36.86996078491211, 18.35301971435547, 17.451614379882812, 34.45115661621094, 16.706764221191406, 9.335319519042969, 62.095436096191406, 39.471290588378906, 87.42034912109375, 7.980556488037109, -14.5260009765625, 57.362274169921875, 30.112918853759766, 89.26422119140625, 42.15789794921875, 59.313621520996094, 41.35194396972656, 54.77593994140625, 19.237224578857422, 14.69588851928711, 20.05294418334961, 38.724727630615234, 15.455106735229492, 5.3988800048828125, 26.923919677734375, 13.823806762695312, 32.8624267578125, 8.509849548339844, 20.894073486328125, 86.38883209228516, 110.30488586425781, 56.505882263183594, 35.3177490234375, 45.43733215332031, 35.753204345703125, 58.23271179199219, 39.83876037597656, 87.4273681640625, 59.57818603515625, 84.5281982421875, 62.08296203613281, 35.08802032470703, 17.481914520263672, 67.13384246826172], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000590.npy"} +{"epoch": 0.8663729809104258, "step": 591, "batch_size": 64, "mean": 36.54948425292969, "std": 25.83894157409668, "min": -17.789215087890625, "p10": 5.315490722656251, "median": 38.88365936279297, "p90": 66.78363342285158, "max": 95.29208374023438, "pos_frac": 0.90625, "sample": [6.39129638671875, 95.29208374023438, 84.81503295898438, 55.43115997314453, 70.0567626953125, 26.330825805664062, 49.006935119628906, 15.801704406738281, 33.060707092285156, 85.97422790527344, 25.03851318359375, 35.76232147216797, 55.63414764404297, 22.238258361816406, 50.901611328125, 60.32355499267578, 63.51764678955078, 55.27734375, 47.59584045410156, 47.78538513183594, 18.001708984375, 56.901092529296875, -1.709207534790039, 13.482986450195312, 52.47785949707031, 18.337379455566406, 13.904451370239258, 63.0736083984375, 50.69691467285156, 24.352340698242188, -15.10307502746582, 39.181793212890625, 57.2507438659668, -10.60150146484375, 19.24592399597168, 21.332191467285156, 4.85443115234375, 11.402250289916992, 58.51176452636719, 10.88502311706543, -17.789215087890625, 53.01171112060547, 51.03097915649414, 54.178932189941406, 14.622276306152344, 48.3646240234375, 69.56891632080078, 13.379589080810547, 29.175960540771484, 38.58552551269531, 68.18334197998047, 40.022926330566406, 44.210899353027344, -1.699859619140625, 62.73553466796875, 36.26154327392578, 73.9940185546875, -13.568260192871094, 40.77324676513672, 25.023704528808594, 13.682907104492188, 9.346000671386719, 58.35105895996094, 35.006500244140625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000591.npy"} +{"epoch": 0.8678414096916299, "step": 592, "batch_size": 64, "mean": 35.76453399658203, "std": 29.050722122192383, "min": -38.77819061279297, "p10": -1.3484394073486323, "median": 41.81907272338867, "p90": 66.41814804077148, "max": 121.98455810546875, "pos_frac": 0.859375, "sample": [64.75821685791016, 76.87496948242188, 9.557584762573242, 56.89288330078125, -1.5548553466796875, -0.45538330078125, 42.43981170654297, 43.75212097167969, 83.78643798828125, 52.78429412841797, 20.378448486328125, 49.50031280517578, -0.8668022155761719, 43.25944519042969, -13.614044189453125, 32.96418380737305, 66.95012664794922, 43.74791717529297, 46.254539489746094, 121.98455810546875, 32.81481170654297, 24.934680938720703, 13.849647521972656, 41.198333740234375, -5.381704330444336, 6.843019485473633, 15.545291900634766, 75.4185791015625, 39.02894973754883, 60.8376350402832, 15.27164077758789, 7.574855804443359, 30.815597534179688, 47.78053283691406, 69.74111938476562, 5.932365417480469, 45.93196105957031, 27.304603576660156, 6.5628204345703125, 65.17686462402344, 54.9718017578125, 30.638755798339844, 53.12439727783203, 41.162200927734375, 12.413330078125, -20.840179443359375, 31.966964721679688, -29.53992462158203, 53.02655029296875, 62.747215270996094, 54.11039733886719, 48.10215759277344, 47.398216247558594, 45.20733642578125, -11.004467010498047, 57.899864196777344, 35.15742492675781, 31.00402069091797, 75.4734878540039, 44.37406921386719, -38.77819061279297, 48.2935791015625, 39.37806701660156, 56.06682586669922], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000592.npy"} +{"epoch": 0.869309838472834, "step": 593, "batch_size": 64, "mean": 29.934890747070312, "std": 28.498645782470703, "min": -28.137195587158203, "p10": -5.1270341873168945, "median": 27.45668125152588, "p90": 65.5358154296875, "max": 103.80619812011719, "pos_frac": 0.828125, "sample": [1.3456954956054688, -2.57366943359375, -1.419647216796875, -10.355865478515625, 15.218109130859375, 21.035751342773438, 39.83082580566406, 13.44961929321289, 54.802330017089844, 63.202362060546875, 24.54869842529297, 71.77754211425781, 24.3436279296875, 5.818572998046875, 8.024978637695312, 13.686960220336914, 103.2721939086914, 66.45137023925781, 38.54736328125, 35.585811614990234, 48.612613677978516, 28.811630249023438, 33.14155578613281, 25.89777374267578, 38.17581558227539, 103.80619812011719, 28.81653594970703, 47.845916748046875, 53.13917541503906, 12.678956985473633, -4.847675323486328, 60.900203704833984, 9.479696273803711, -2.9861793518066406, 36.22193145751953, 87.58817291259766, 9.161163330078125, 44.34138488769531, 9.442649841308594, 14.740299224853516, -9.191360473632812, 56.17158508300781, -24.02617645263672, 30.772491455078125, 26.10173225402832, 17.616485595703125, 16.993654251098633, 44.527740478515625, 73.68574523925781, 46.65681457519531, 14.4139404296875, 25.813926696777344, 72.021240234375, 53.26123809814453, 48.50177764892578, 37.087867736816406, 63.39952087402344, -5.246759414672852, -7.524370193481445, 34.41278839111328, -9.062324523925781, -28.137195587158203, 22.177989959716797, 43.8442268371582], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000593.npy"} +{"epoch": 0.8707782672540382, "step": 594, "batch_size": 64, "mean": 39.89767837524414, "std": 35.56900405883789, "min": -43.52124786376953, "p10": 2.287495040893556, "median": 32.52593231201172, "p90": 93.06329956054691, "max": 123.5042724609375, "pos_frac": 0.921875, "sample": [75.03778076171875, 1.772857666015625, 4.047513961791992, 1.4643611907958984, 11.004631042480469, 22.725868225097656, 96.6720199584961, 26.496700286865234, 24.213275909423828, 3.4883155822753906, 26.333560943603516, 78.46131134033203, 83.70964050292969, 69.49751281738281, 27.34078598022461, 20.3016357421875, 67.83544921875, 34.80656433105469, 5.3207550048828125, -14.489139556884766, 46.617103576660156, 22.477508544921875, -0.8627166748046875, 84.64295196533203, 11.98086929321289, 65.56695556640625, -12.347015380859375, 55.588844299316406, 57.15023422241211, 37.75062561035156, 15.401412963867188, 56.6746826171875, 52.221099853515625, 47.254417419433594, 67.27723693847656, 49.00244903564453, 26.797527313232422, -1.2255439758300781, 32.926780700683594, -43.52124786376953, 7.873989105224609, 99.28671264648438, 37.017730712890625, 58.29230499267578, 32.125083923339844, 96.780517578125, 81.11680603027344, 59.402130126953125, 30.841781616210938, 28.149295806884766, 12.581169128417969, 5.9188079833984375, 36.75345230102539, 10.252922058105469, 108.80105590820312, 5.527591705322266, 46.12986755371094, 114.28829956054688, 118.8553466796875, 24.68657875061035, 123.5042724609375, 9.117067337036133, 15.959487915039062, 52.773643493652344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000594.npy"} +{"epoch": 0.8722466960352423, "step": 595, "batch_size": 64, "mean": 32.23430633544922, "std": 27.126323699951172, "min": -6.165813446044922, "p10": -0.36096076965332013, "median": 26.151134490966797, "p90": 69.54967041015627, "max": 124.7703857421875, "pos_frac": 0.875, "sample": [-3.4425621032714844, 28.971282958984375, 22.557048797607422, 27.135793685913086, -2.109973907470703, 70.70005798339844, 15.703813552856445, 56.83185577392578, -0.6159172058105469, 32.40089416503906, 24.448287963867188, 25.418087005615234, -0.1694049835205078, 66.86543273925781, 4.416664123535156, 48.03999328613281, 24.69322395324707, 124.7703857421875, 46.25336456298828, 23.683597564697266, 11.403865814208984, 17.738300323486328, 26.581687927246094, 20.04094696044922, 11.660598754882812, 13.03708267211914, 20.29971694946289, 28.026391983032227, 32.04512023925781, 54.00879669189453, -0.4430561065673828, 34.792579650878906, 39.55809783935547, 25.7205810546875, 72.29046630859375, 47.07508087158203, 25.022552490234375, 39.78791427612305, 9.836509704589844, 64.21726989746094, 15.310333251953125, 37.44926834106445, 39.4287109375, -3.7113418579101562, -1.1514739990234375, 38.51613235473633, 20.539691925048828, 4.006462097167969, 34.82410430908203, 64.25530242919922, 11.039703369140625, 98.2340087890625, -6.165813446044922, 34.93373107910156, 23.27634048461914, 35.88240051269531, 44.31049346923828, 14.375396728515625, 15.087448120117188, 2.2027854919433594, 77.02146911621094, 54.8012809753418, 89.0314712524414, 90.245361328125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000595.npy"} +{"epoch": 0.8737151248164464, "step": 596, "batch_size": 64, "mean": 34.995357513427734, "std": 30.359037399291992, "min": -25.180879592895508, "p10": -1.2824281692504873, "median": 32.723670959472656, "p90": 77.81425094604492, "max": 111.18344116210938, "pos_frac": 0.875, "sample": [-7.119140625, -1.7886962890625, 60.328006744384766, -5.627403259277344, 27.89263916015625, 77.59355163574219, 2.2541885375976562, 69.2437744140625, 32.938720703125, -0.25730323791503906, 77.9088363647461, 62.866912841796875, 13.130752563476562, 62.09300994873047, 53.86530303955078, 82.1135025024414, 29.42436981201172, 17.621280670166016, 57.17272186279297, 76.73582458496094, 2.259571075439453, 81.4970703125, 23.63863754272461, 25.66356658935547, 26.18860626220703, 106.83212280273438, -3.992433547973633, 34.061431884765625, 48.09088897705078, 5.174476623535156, 35.19280242919922, 20.813669204711914, 11.648277282714844, 88.77093505859375, 7.337747573852539, 15.774505615234375, 64.7754135131836, 43.82014846801758, 54.21266174316406, -1.7217674255371094, 39.040199279785156, 7.426664352416992, 43.82605743408203, 22.984939575195312, 36.354644775390625, 72.80015563964844, 33.48731994628906, 25.479080200195312, 32.50862121582031, 40.89616394042969, 11.468246459960938, 5.546442031860352, -8.369792938232422, 57.858612060546875, 12.253952026367188, -25.180879592895508, 35.75738525390625, 16.93536376953125, 42.448875427246094, 111.18344116210938, 82.29814147949219, 2.1825618743896484, 46.120826721191406, 15.966764450073242], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000596.npy"} +{"epoch": 0.8751835535976505, "step": 597, "batch_size": 64, "mean": 35.32228469848633, "std": 31.88015365600586, "min": -21.089923858642578, "p10": -2.174057388305664, "median": 29.974140167236328, "p90": 75.38216476440431, "max": 119.52737426757812, "pos_frac": 0.859375, "sample": [25.47442626953125, 33.90740203857422, 29.605453491210938, 41.71091079711914, 39.3201904296875, 119.52737426757812, 6.170093536376953, 48.135887145996094, 21.46938705444336, 31.4545955657959, 11.538360595703125, 20.669715881347656, 9.842185974121094, -1.9275550842285156, 30.274459838867188, -4.025276184082031, 26.206363677978516, 36.244873046875, 52.851165771484375, 96.99240112304688, 16.427217483520508, 20.461257934570312, 9.924331665039062, 59.383697509765625, 70.7820053100586, 12.834342956542969, -4.6646270751953125, 4.806205749511719, 60.8980712890625, 11.119491577148438, -13.413780212402344, -16.493207931518555, 46.943511962890625, 22.75897216796875, 29.67382049560547, 79.05339050292969, 35.88275146484375, -1.5731887817382812, -12.469465255737305, 21.716407775878906, 71.54598999023438, 65.67143249511719, 2.910430908203125, 71.3884048461914, 3.9452896118164062, 51.63746643066406, 44.764713287353516, 58.309906005859375, 28.34368896484375, 76.85539245605469, 71.94463348388672, -2.2797012329101562, -21.089923858642578, 109.94485473632812, 63.8565673828125, 7.352714538574219, 88.30950164794922, 20.934906005859375, 63.128814697265625, 68.98904418945312, 84.91926574707031, 30.93193817138672, 14.829055786132812, 53.98817443847656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000597.npy"} +{"epoch": 0.8766519823788547, "step": 598, "batch_size": 64, "mean": 32.456298828125, "std": 25.231464385986328, "min": -33.41507339477539, "p10": 7.427731132507325, "median": 32.41542434692383, "p90": 62.44279937744141, "max": 94.62224578857422, "pos_frac": 0.921875, "sample": [66.09819030761719, 80.36026000976562, 16.079120635986328, 55.319068908691406, 13.653030395507812, 60.879730224609375, 52.30989074707031, 54.256248474121094, 9.451099395751953, 20.74938201904297, 18.884727478027344, -6.449008941650391, -33.41507339477539, 36.012306213378906, 56.76158142089844, -9.683610916137695, 53.79810333251953, 22.266677856445312, 45.64982986450195, 20.670835494995117, 14.002296447753906, 33.179237365722656, 81.43327331542969, 30.145549774169922, 50.75498962402344, 35.760902404785156, 79.83354187011719, 33.72811508178711, -15.586051940917969, 7.156637191772461, 9.74078369140625, 37.71784210205078, 13.389150619506836, 15.3475341796875, 8.060283660888672, 50.09294509887695, 15.356563568115234, 35.008941650390625, 15.961074829101562, 40.03448486328125, 21.908836364746094, 28.89270782470703, 2.6153793334960938, 61.072845458984375, -7.832820892333984, 57.27449035644531, 35.33148956298828, 10.033731460571289, 14.563837051391602, 41.19251251220703, 18.35118865966797, 31.651611328125, 94.62224578857422, 51.79002380371094, 36.160614013671875, 9.105661392211914, 53.2237548828125, 63.02992248535156, 54.77180480957031, 41.6988525390625, 28.21075439453125, 17.784536361694336, 67.13439178466797, 19.84427261352539], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000598.npy"} +{"epoch": 0.8781204111600588, "step": 599, "batch_size": 64, "mean": 32.661949157714844, "std": 25.800249099731445, "min": -13.721504211425781, "p10": -0.809971046447752, "median": 31.273252487182617, "p90": 69.87661743164062, "max": 83.75395202636719, "pos_frac": 0.890625, "sample": [48.22063446044922, 1.801483154296875, 26.041168212890625, -8.793586730957031, 21.749229431152344, 79.41790771484375, 54.93902587890625, 35.2022705078125, 50.408180236816406, 28.297470092773438, 71.11836242675781, 71.59242248535156, 6.569616317749023, 80.29075622558594, 1.111825942993164, 26.072158813476562, 83.75395202636719, 34.53150177001953, 18.586700439453125, 32.668212890625, 15.045318603515625, 48.91089630126953, 44.49285125732422, -5.803958892822266, 39.610443115234375, 4.094911575317383, 32.2958984375, 52.09524917602539, 1.9711265563964844, 25.677452087402344, 69.10394287109375, -9.970169067382812, 51.249961853027344, 47.82270812988281, 41.70024108886719, 83.1680679321289, 53.58038330078125, 19.331214904785156, 61.11042022705078, 10.781166076660156, 55.07611846923828, 17.753082275390625, 70.207763671875, 51.67856216430664, 28.692916870117188, 14.531471252441406, 52.851661682128906, -13.721504211425781, 45.032936096191406, 21.895736694335938, 23.5828857421875, 3.3974132537841797, -3.7478103637695312, -1.6335983276367188, 17.38592529296875, 39.499298095703125, 47.81296157836914, 22.923492431640625, 45.492958068847656, 66.05470275878906, 4.2212982177734375, -8.466251373291016, 30.250606536865234, 9.744739532470703], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000599.npy"} +{"epoch": 0.8795888399412628, "step": 600, "batch_size": 64, "mean": 30.076370239257812, "std": 25.40629768371582, "min": -29.105804443359375, "p10": 6.32385540008545, "median": 27.786548614501953, "p90": 68.98934631347657, "max": 103.53346252441406, "pos_frac": 0.921875, "sample": [24.7930850982666, 12.828899383544922, 37.0179443359375, -24.881027221679688, 55.80451202392578, 13.215133666992188, 30.103878021240234, 12.218246459960938, 7.007081985473633, 54.51679229736328, 17.477012634277344, 25.83094024658203, 41.32173156738281, 11.620641708374023, 38.20699691772461, 74.97270202636719, 11.88101577758789, 18.06958770751953, 15.242504119873047, 11.446224212646484, 52.381439208984375, 75.38541412353516, 18.486892700195312, 6.027929306030273, 18.60296630859375, 36.5919189453125, 21.04686737060547, 103.53346252441406, 81.63162231445312, 30.656848907470703, 19.05796241760254, 64.92825317382812, 70.1064453125, 30.4520206451416, 66.38278198242188, 6.031044006347656, -12.89874267578125, 30.9161376953125, -9.136819839477539, 35.870849609375, 41.83038330078125, 42.85975646972656, 20.75341796875, 23.981491088867188, 55.71528625488281, 16.107765197753906, 35.25088882446289, 34.0533561706543, 10.183197021484375, 41.54798889160156, 16.957847595214844, 41.56836700439453, 74.96381378173828, 42.834869384765625, 36.37432098388672, 73.07698059082031, -29.105804443359375, 29.742156982421875, 14.749471664428711, -2.4287185668945312, 17.37078857421875, 10.731834411621094, 10.60799789428711, 30.410940170288086], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000600.npy"} +{"epoch": 0.8810572687224669, "step": 601, "batch_size": 64, "mean": 34.12665939331055, "std": 29.32499885559082, "min": -24.4970703125, "p10": -1.2176856994628893, "median": 30.437349319458008, "p90": 69.75142822265626, "max": 114.76986694335938, "pos_frac": 0.890625, "sample": [3.7620620727539062, 18.405471801757812, 114.76986694335938, 62.29195785522461, 9.011078834533691, 64.53580474853516, 33.12590026855469, 27.053680419921875, 30.752925872802734, 55.864845275878906, 24.488365173339844, 9.279434204101562, 67.53414916992188, 24.070724487304688, 50.843292236328125, 27.293006896972656, 20.29453468322754, 55.81732177734375, 24.119972229003906, 61.759788513183594, 27.252792358398438, 72.23069763183594, -24.4970703125, 29.84466552734375, 27.712120056152344, 53.28388595581055, 1.2150020599365234, 37.1907958984375, 62.643775939941406, 34.031105041503906, 7.947715759277344, 38.76466369628906, -4.712127685546875, 8.07968521118164, 26.723220825195312, 70.70169067382812, 13.929862976074219, 72.15985870361328, 31.033294677734375, 25.117538452148438, 0.187286376953125, 30.12177276611328, -1.8198165893554688, -6.8827362060546875, 26.891143798828125, -16.54458999633789, 2.422515869140625, 51.48710632324219, 62.754249572753906, -13.326667785644531, 39.43742370605469, 76.78056335449219, 34.82609558105469, 43.80656433105469, -20.675296783447266, 108.00767517089844, 28.623350143432617, 40.60894775390625, 37.1129150390625, 60.066192626953125, 96.43248748779297, 11.063934326171875, 54.741424560546875, 42.256317138671875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000601.npy"} +{"epoch": 0.882525697503671, "step": 602, "batch_size": 64, "mean": 37.70860290527344, "std": 28.099163055419922, "min": -14.713577270507812, "p10": 2.2413087844848634, "median": 36.126731872558594, "p90": 75.73837509155275, "max": 131.0216522216797, "pos_frac": 0.921875, "sample": [44.640533447265625, 15.379325866699219, -1.97296142578125, 8.036182403564453, 28.100187301635742, 41.58158874511719, 29.48340606689453, 60.094696044921875, 2.2260589599609375, 25.73461151123047, 54.045921325683594, 37.07283020019531, 21.542436599731445, 30.61760711669922, 82.02513122558594, 20.02304458618164, 71.76248168945312, 68.88758850097656, 77.44232940673828, 39.25389099121094, 30.891510009765625, 19.295368194580078, -4.9249267578125, 7.791191101074219, 2.2141590118408203, 56.823394775390625, 53.47917938232422, 10.359630584716797, -1.860198974609375, 2.2768917083740234, 8.407066345214844, 33.50921630859375, 53.038665771484375, 23.080501556396484, 40.02393341064453, 68.4837646484375, 12.195693969726562, 38.00135040283203, 26.550086975097656, 4.717578887939453, 94.20051574707031, 56.78590393066406, 81.21250915527344, 35.180633544921875, 34.93971252441406, 79.08866882324219, 131.0216522216797, -11.6102294921875, 47.81700897216797, 18.87921142578125, 63.04985809326172, 49.59544372558594, 57.50450897216797, 43.140159606933594, 61.7091064453125, 39.68592834472656, -14.713577270507812, 17.125877380371094, 32.76074981689453, 39.50608825683594, 63.297119140625, 78.91370391845703, 43.73876953125, 30.190231323242188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000602.npy"} +{"epoch": 0.8839941262848752, "step": 603, "batch_size": 64, "mean": 32.04313659667969, "std": 26.283693313598633, "min": -14.387336730957031, "p10": 0.5817659378051763, "median": 32.51904296875, "p90": 66.30594940185547, "max": 95.30891418457031, "pos_frac": 0.90625, "sample": [9.16522216796875, 25.37274742126465, 43.57965087890625, 1.0159320831298828, 95.30891418457031, 18.660430908203125, 62.46348571777344, 60.211219787597656, 23.506179809570312, 66.71331787109375, 35.49022674560547, 43.235939025878906, -11.655136108398438, 74.65619659423828, 9.600906372070312, 38.352508544921875, 18.263275146484375, 56.30403137207031, 14.1605224609375, 37.90293884277344, 65.19720458984375, 26.601036071777344, 35.784400939941406, 12.023475646972656, 21.869842529296875, 38.30217742919922, 69.68780517578125, 36.430545806884766, 44.50645446777344, 25.53313446044922, 38.34599685668945, -11.371570587158203, 48.13377380371094, -14.387336730957031, 24.904518127441406, 65.35542297363281, 84.30320739746094, 1.5115509033203125, 0.3956947326660156, 19.810333251953125, 44.644622802734375, 39.83153533935547, 1.5325241088867188, 8.366024017333984, -2.3570213317871094, 24.408340454101562, 8.576797485351562, 14.399377822875977, 4.5867919921875, 49.10345458984375, -11.112096786499023, 52.442176818847656, 29.54785919189453, 10.4888916015625, 42.49042892456055, -4.9643402099609375, 48.6978759765625, 48.891456604003906, 57.994407653808594, 13.586669921875, 76.34245300292969, 46.7260627746582, 84.30973815917969, 6.980587005615234], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000603.npy"} +{"epoch": 0.8854625550660793, "step": 604, "batch_size": 64, "mean": 35.393802642822266, "std": 29.887229919433594, "min": -26.029582977294922, "p10": 2.5117891311645537, "median": 32.554237365722656, "p90": 71.26539535522463, "max": 118.86788940429688, "pos_frac": 0.90625, "sample": [18.201574325561523, 9.363059997558594, 27.09471893310547, 1.2892608642578125, 67.32255554199219, 44.06249237060547, 50.38340759277344, 28.250320434570312, 8.351783752441406, 15.190681457519531, -7.095367431640625, 72.48963165283203, 17.05475616455078, -6.930931091308594, 61.44618225097656, 26.456817626953125, 35.157989501953125, 5.364355087280273, 49.31262969970703, 63.92451477050781, 34.42412185668945, -0.9449005126953125, 6.431800842285156, 12.267173767089844, -7.538320541381836, -26.029582977294922, 36.16417694091797, 8.985164642333984, 64.0881118774414, 13.1942138671875, 9.757240295410156, 29.403839111328125, 18.88855743408203, 42.03437042236328, 49.21574783325195, 68.40884399414062, 26.110137939453125, 7.940803527832031, 80.50869750976562, 14.0552978515625, 37.141727447509766, 32.06212615966797, 68.2630386352539, -21.639434814453125, 74.66512298583984, 84.9442138671875, 53.589256286621094, 118.86788940429688, 39.08287811279297, 36.917449951171875, 39.16064453125, 47.866859436035156, 113.63299560546875, 28.267833709716797, 50.208885192871094, 46.81093215942383, 30.402793884277344, 13.862548828125, 59.064273834228516, 31.85382843017578, 33.046348571777344, 29.713546752929688, 106.64500427246094, 36.71664810180664], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000604.npy"} +{"epoch": 0.8869309838472834, "step": 605, "batch_size": 64, "mean": 31.686960220336914, "std": 30.38180923461914, "min": -27.89897918701172, "p10": 3.4804565429687506, "median": 26.019160270690918, "p90": 69.82191925048829, "max": 156.74188232421875, "pos_frac": 0.953125, "sample": [19.56053924560547, 28.291046142578125, 8.016876220703125, 23.27449607849121, 44.583961486816406, 25.091598510742188, 18.155534744262695, 67.01567077636719, 75.16107940673828, 17.047443389892578, 35.988525390625, 13.713165283203125, 10.278837203979492, 20.52655792236328, 29.047046661376953, 57.52874755859375, 46.29804229736328, 81.283203125, 71.02459716796875, 39.923255920410156, 156.74188232421875, 16.066604614257812, 3.1842193603515625, 48.86293029785156, 31.897377014160156, 13.678329467773438, 39.133209228515625, 11.115287780761719, -27.89897918701172, 2.4475021362304688, 18.596328735351562, 2.1431846618652344, 44.776145935058594, -24.780487060546875, 2.968372344970703, 20.436004638671875, 108.19694519042969, 31.447050094604492, 4.1716766357421875, 95.15560913085938, 32.253360748291016, 102.61080932617188, 20.294790267944336, 7.534526824951172, 23.668701171875, 26.94672203063965, 11.97274398803711, 15.85776138305664, 7.277580261230469, 19.430246353149414, 20.223915100097656, 41.36688232421875, 27.221824645996094, 51.69105529785156, -5.111490249633789, 23.084672927856445, 46.752235412597656, 8.882259368896484, 36.5297966003418, 27.305709838867188, 28.52825927734375, 32.94408416748047, 45.090049743652344, 45.45953369140625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000605.npy"} +{"epoch": 0.8883994126284875, "step": 606, "batch_size": 64, "mean": 36.67774963378906, "std": 26.492900848388672, "min": -3.8375320434570312, "p10": 5.680474472045899, "median": 36.77361488342285, "p90": 70.83138427734376, "max": 123.25448608398438, "pos_frac": 0.96875, "sample": [-0.9348793029785156, 4.040679931640625, 52.04517364501953, 10.99897575378418, 59.847572326660156, 56.28143310546875, 9.31039047241211, 14.160629272460938, 69.81092834472656, 66.04307556152344, 26.61341094970703, 123.25448608398438, 39.006988525390625, -3.8375320434570312, 15.7664794921875, 42.75115966796875, 14.253547668457031, 5.547344207763672, 66.71092224121094, 53.56865692138672, 50.37080001831055, 15.32927131652832, 50.10862731933594, 0.59637451171875, 77.46087646484375, 89.67227172851562, 13.256988525390625, 9.966033935546875, 13.747777938842773, 31.857284545898438, 14.542404174804688, 31.476112365722656, 13.129789352416992, 12.553787231445312, 1.0646743774414062, 4.114189147949219, 41.77317810058594, 63.11565399169922, 6.965982437133789, 9.656890869140625, 39.81366729736328, 50.514686584472656, 71.74560546875, 51.971290588378906, 47.96429443359375, 50.7138671875, 62.03748321533203, 44.84642028808594, 71.26872253417969, 20.284954071044922, 23.230667114257812, 54.918907165527344, 21.031112670898438, 42.54792785644531, 34.54024124145508, 65.9830322265625, 39.5592041015625, 30.088584899902344, 25.055908203125, 78.061767578125, 72.41902160644531, 20.336212158203125, 46.45271301269531, 5.991111755371094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000606.npy"} +{"epoch": 0.8898678414096917, "step": 607, "batch_size": 64, "mean": 32.44065856933594, "std": 23.698347091674805, "min": -10.40713119506836, "p10": 4.845211219787598, "median": 31.25627326965332, "p90": 61.344248962402354, "max": 100.00485229492188, "pos_frac": 0.953125, "sample": [58.00492858886719, 70.0317611694336, 50.49311828613281, 11.095037460327148, 40.17594909667969, 32.71983337402344, 24.830184936523438, 100.00485229492188, 17.063697814941406, 37.17278289794922, -10.40713119506836, 37.267356872558594, 31.064620971679688, 7.130195617675781, 33.8835563659668, 96.81060791015625, 7.957256317138672, 28.92887306213379, 18.52685546875, 22.292085647583008, 10.387344360351562, 4.813295364379883, 54.296485900878906, 19.586883544921875, 4.919681549072266, 51.63328552246094, 4.180694580078125, 31.447925567626953, 14.96929931640625, 37.26868438720703, 44.84303283691406, 30.570159912109375, 70.5453109741211, 32.247802734375, 51.126991271972656, 41.910545349121094, 27.88409423828125, 2.506744384765625, 44.55385971069336, 39.9028434753418, 12.726791381835938, 74.45989990234375, 4.30401611328125, 62.039207458496094, 16.06249237060547, 10.725566864013672, 13.556037902832031, 40.738059997558594, 50.88667297363281, 53.59880828857422, 19.95196533203125, 44.506805419921875, 30.103378295898438, 32.46833038330078, -7.7268829345703125, 19.982025146484375, 52.47504425048828, 74.41314697265625, 10.103607177734375, 37.510406494140625, 11.599140167236328, -6.108325958251953, 59.722679138183594, 23.461843490600586], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000607.npy"} +{"epoch": 0.8913362701908958, "step": 608, "batch_size": 64, "mean": 33.237815856933594, "std": 26.6564884185791, "min": -25.257266998291016, "p10": 0.7183568954467783, "median": 33.07780075073242, "p90": 73.3316246032715, "max": 85.30990600585938, "pos_frac": 0.90625, "sample": [83.35868072509766, 24.413429260253906, 14.718513488769531, 1.5874595642089844, 29.019371032714844, 56.55963897705078, 28.96075439453125, 35.144466400146484, 46.6161003112793, 51.90650939941406, 55.516578674316406, 0.3458843231201172, 34.72418975830078, 39.30885314941406, -4.1858062744140625, 16.585805892944336, -5.152923583984375, 59.8511962890625, 8.957832336425781, 40.57539367675781, 71.15837860107422, 38.059600830078125, 20.397491455078125, 19.152015686035156, -15.659149169921875, 19.757518768310547, 18.52294921875, 41.195579528808594, 67.05799865722656, 4.919303894042969, 50.03671646118164, 6.6072540283203125, 53.33872985839844, 5.869499206542969, 30.44317626953125, 41.3280029296875, 19.418807983398438, -20.204010009765625, 41.354888916015625, -2.2163848876953125, 75.81419372558594, 42.19859313964844, 85.30990600585938, 36.952545166015625, 14.750541687011719, 16.732376098632812, 6.4394989013671875, 34.12297058105469, 47.72673797607422, 9.999271392822266, 57.40034484863281, 12.280731201171875, 27.405685424804688, 61.48651123046875, -25.257266998291016, 29.142578125, 17.291828155517578, 49.875396728515625, 32.032630920410156, 83.8514404296875, 74.26301574707031, 76.68572998046875, 52.752410888671875, 78.61231231689453], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000608.npy"} +{"epoch": 0.8928046989720999, "step": 609, "batch_size": 64, "mean": 33.63862228393555, "std": 23.901885986328125, "min": -16.501113891601562, "p10": 3.3186065673828145, "median": 33.712440490722656, "p90": 63.36136856079102, "max": 101.17274475097656, "pos_frac": 0.953125, "sample": [65.38839721679688, 21.552032470703125, 11.884008407592773, 21.286453247070312, 26.67548370361328, 67.64089965820312, 50.613037109375, 24.247421264648438, 37.821231842041016, 20.462692260742188, 46.89109420776367, 57.52342224121094, 8.304183959960938, 26.33224868774414, 34.98974609375, -16.501113891601562, 2.51654052734375, 38.16802978515625, 42.86864471435547, 22.21167755126953, 18.79693603515625, 38.61785888671875, 39.83007049560547, 59.572303771972656, 58.73283386230469, 52.87725067138672, 62.478736877441406, 12.281494140625, 30.51443099975586, 50.821800231933594, 34.40215301513672, 17.011436462402344, 48.304290771484375, 51.473426818847656, 81.98455810546875, 63.73963928222656, -8.117897033691406, 33.09442901611328, 36.865936279296875, 54.47553634643555, 10.709630966186523, 42.47761535644531, 32.95332336425781, 101.17274475097656, 45.649688720703125, 61.48307800292969, 80.72802734375, 1.7581367492675781, 6.944231033325195, 8.057281494140625, 49.53044891357422, 19.87883758544922, -5.767326354980469, 19.504486083984375, 14.754646301269531, 1.9644699096679688, 24.77252960205078, 13.387237548828125, 34.33045196533203, 71.09063720703125, 1.812582015991211, 24.231094360351562, 37.62451934814453, 5.190093994140625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000609.npy"} +{"epoch": 0.8942731277533039, "step": 610, "batch_size": 64, "mean": 32.680015563964844, "std": 25.850717544555664, "min": -22.056419372558594, "p10": 1.468102264404298, "median": 34.61331558227539, "p90": 67.08309173583984, "max": 92.38442993164062, "pos_frac": 0.921875, "sample": [24.71172523498535, 27.482147216796875, 38.3306884765625, 36.697391510009766, 8.186088562011719, 50.80116271972656, 13.73175048828125, 12.564987182617188, 25.644248962402344, 39.4141845703125, 0.9601974487304688, 58.85346984863281, 2.6532135009765625, 75.82504272460938, 89.6973876953125, 41.18206787109375, 32.690185546875, 31.919836044311523, -9.775291442871094, 15.997665405273438, 33.84028625488281, 35.38634490966797, 5.074302673339844, 43.96769714355469, 15.72113037109375, 19.77678680419922, -7.402858734130859, 5.03375244140625, 78.32209777832031, 92.38442993164062, 79.51089477539062, 6.416343688964844, 0.8397312164306641, 70.83443450927734, 46.887420654296875, 41.327911376953125, 35.63355255126953, 43.87162780761719, 41.644142150878906, 25.245555877685547, 42.76097106933594, 52.6475830078125, -2.361766815185547, 18.290367126464844, 67.19558715820312, 24.850425720214844, 57.88819885253906, 53.328922271728516, 2.7772789001464844, 48.954071044921875, 8.955825805664062, 66.82060241699219, 16.80496597290039, 3.7688636779785156, 40.95927429199219, 47.678550720214844, 54.35896301269531, 38.98234558105469, 10.604085922241211, 49.830718994140625, -22.056419372558594, -11.974624633789062, 62.248291015625, 26.324295043945312], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000610.npy"} +{"epoch": 0.895741556534508, "step": 611, "batch_size": 64, "mean": 35.87771224975586, "std": 28.05087661743164, "min": -14.700069427490234, "p10": 2.023128890991211, "median": 32.880510330200195, "p90": 73.9546356201172, "max": 96.7728042602539, "pos_frac": 0.9375, "sample": [15.946197509765625, 39.29905700683594, 68.1657485961914, 69.05841064453125, 62.39240264892578, 33.12559509277344, 20.1534423828125, 35.36476135253906, 37.25591278076172, -1.5589218139648438, 31.870826721191406, 71.76959228515625, -14.700069427490234, 4.778102874755859, -6.546588897705078, 2.2825145721435547, 4.7021942138671875, 20.777084350585938, 47.780479431152344, 90.0774917602539, 3.413055419921875, 93.99890899658203, 67.01332092285156, 3.1324920654296875, 42.15671920776367, 27.161102294921875, 81.47016143798828, 74.89108276367188, 19.001426696777344, 52.68736267089844, 39.61650085449219, 22.532180786132812, 47.903038024902344, 10.066766738891602, 25.199668884277344, 22.44493865966797, 0.7492790222167969, 19.012636184692383, 3.200794219970703, 46.81817626953125, 76.61491394042969, 47.0758056640625, 50.81138610839844, 52.800132751464844, 43.64842224121094, 48.13402557373047, 14.523473739624023, 32.63542556762695, 60.92427444458008, 13.218017578125, 30.209247589111328, 1.3465805053710938, 63.82648468017578, 62.090301513671875, -0.9665908813476562, 19.991600036621094, 96.7728042602539, 25.102684020996094, 2.146221160888672, 68.79826354980469, 82.84332275390625, 1.9703750610351562, 53.847076416015625, 13.345630645751953], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000611.npy"} +{"epoch": 0.8972099853157122, "step": 612, "batch_size": 64, "mean": 40.946144104003906, "std": 27.59153175354004, "min": -15.923892974853516, "p10": 10.427021789550782, "median": 41.15257453918457, "p90": 80.33059387207031, "max": 104.48534393310547, "pos_frac": 0.9375, "sample": [63.21441650390625, 34.083492279052734, 46.66572189331055, 10.312210083007812, 22.543224334716797, 61.053123474121094, 58.99977111816406, 45.380035400390625, 10.00860595703125, -15.923892974853516, 24.73125457763672, 58.90525817871094, 41.96376037597656, 72.60275268554688, 50.779632568359375, 57.32579803466797, 104.48534393310547, 46.158302307128906, 56.245208740234375, 46.142478942871094, 32.982574462890625, 11.918994903564453, 16.902502059936523, 51.894744873046875, 79.79969787597656, 22.155166625976562, 29.16876220703125, 10.694915771484375, 65.19168090820312, 62.590576171875, 50.28966522216797, 21.6165771484375, 5.624481201171875, 12.23095703125, 80.86015319824219, 30.767719268798828, 33.00041198730469, 33.94642639160156, 68.95770263671875, 20.619449615478516, 18.901235580444336, 27.340007781982422, -2.22747802734375, 41.470375061035156, 85.41304016113281, 15.727333068847656, 96.36502075195312, 40.834774017333984, 92.68037414550781, 14.843620300292969, -5.676689147949219, 60.0617561340332, 80.55812072753906, 40.000404357910156, 11.726531982421875, 17.18000030517578, 63.9185791015625, 50.86015319824219, 16.940773010253906, 49.14671325683594, 103.86885070800781, 42.61309051513672, 23.77243995666504, -2.655670166015625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000612.npy"} +{"epoch": 0.8986784140969163, "step": 613, "batch_size": 64, "mean": 35.77855682373047, "std": 27.456396102905273, "min": -12.120407104492188, "p10": 0.35380821228027437, "median": 37.126983642578125, "p90": 62.118386077880864, "max": 121.32936096191406, "pos_frac": 0.890625, "sample": [40.599365234375, 30.945098876953125, 37.78752136230469, 20.993541717529297, 34.12467956542969, 56.586761474609375, 28.161144256591797, 46.895355224609375, 44.50419616699219, 42.397186279296875, 42.842864990234375, -12.120407104492188, 56.36618423461914, -6.540153503417969, 121.32936096191406, 51.62477111816406, 41.971153259277344, 73.14768981933594, 41.92845153808594, 13.087738037109375, 63.09832000732422, 32.78126525878906, 61.792274475097656, 103.502197265625, 58.70806121826172, 32.43409729003906, 50.06599426269531, 73.51201629638672, 14.12701416015625, 24.728809356689453, 7.9625244140625, 23.016372680664062, 24.307634353637695, 24.934974670410156, 7.8839569091796875, 50.573638916015625, 18.356449127197266, 50.61512756347656, 44.98039245605469, -0.048313140869140625, 42.287940979003906, 51.046302795410156, 62.258148193359375, 22.608543395996094, -1.582305908203125, 43.33721160888672, 26.658660888671875, 119.79960632324219, 2.496488571166992, -6.5037841796875, 36.77714538574219, 38.612388610839844, 1.2920913696289062, 18.200702667236328, 50.3818359375, 52.95663070678711, 27.293289184570312, 12.121307373046875, -3.1679000854492188, 21.090744018554688, 6.6322784423828125, 60.384605407714844, 37.47682189941406, -6.5965728759765625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000613.npy"} +{"epoch": 0.9001468428781204, "step": 614, "batch_size": 64, "mean": 35.7594108581543, "std": 25.567346572875977, "min": -9.156158447265625, "p10": 4.896209716796877, "median": 35.86566925048828, "p90": 67.44098129272462, "max": 98.75542449951172, "pos_frac": 0.953125, "sample": [18.411285400390625, 44.73944854736328, 16.345474243164062, 52.95458984375, 24.204025268554688, 47.126461029052734, 10.373355865478516, 57.83917236328125, 21.559934616088867, 3.1920204162597656, 46.79850769042969, 23.77657699584961, 46.073028564453125, 50.05914306640625, 49.902488708496094, -9.156158447265625, 47.7130126953125, 98.75542449951172, 28.883377075195312, 2.771251678466797, 91.30963134765625, 7.1116943359375, 40.264469146728516, 33.77952194213867, 35.414268493652344, 3.94671630859375, 40.50115966796875, 30.521377563476562, 20.375926971435547, 39.87433624267578, 30.923004150390625, 32.09058380126953, 30.00063705444336, 41.95222854614258, 15.672386169433594, 39.07218933105469, 10.792720794677734, 1.4810962677001953, 36.31707000732422, 52.34282684326172, 18.05907440185547, 9.066158294677734, 41.42561340332031, 44.45405578613281, 65.35164642333984, 90.22541809082031, -9.070411682128906, 31.946502685546875, 39.41743469238281, 7.8668212890625, 16.696815490722656, 10.452695846557617, 44.50128173828125, 41.56214904785156, 95.72602081298828, 68.33641052246094, 41.96112823486328, 16.613557815551758, 41.05585479736328, -1.6690597534179688, 95.87632751464844, 22.04730224609375, 52.227256774902344, 88.40596771240234], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000614.npy"} +{"epoch": 0.9016152716593245, "step": 615, "batch_size": 64, "mean": 30.23415184020996, "std": 23.44518280029297, "min": -6.964441299438477, "p10": 3.257430267333985, "median": 24.836122512817383, "p90": 60.99833221435547, "max": 79.88117218017578, "pos_frac": 0.921875, "sample": [2.93682861328125, 67.80497741699219, 29.568342208862305, 7.6898651123046875, 19.222185134887695, 4.005500793457031, -0.1749420166015625, 5.3954315185546875, 24.865966796875, 15.762344360351562, 53.324554443359375, 47.24806213378906, 17.156898498535156, 48.78266143798828, 70.4590072631836, 16.667572021484375, 13.763092041015625, 58.804969787597656, 14.127769470214844, 7.225578308105469, 6.279659271240234, 2.315093994140625, 75.54811096191406, 6.678337097167969, 12.574586868286133, 61.110626220703125, 49.394134521484375, 67.55776977539062, 42.288021087646484, 7.568140029907227, 18.581539154052734, 50.19978332519531, 37.013702392578125, 7.6113128662109375, 23.657634735107422, 47.00914764404297, 54.06941223144531, 44.35154724121094, 23.311294555664062, 60.73631286621094, -6.837516784667969, 54.83122253417969, 12.531997680664062, -6.964441299438477, 30.309356689453125, 32.566856384277344, 35.21397399902344, 69.13798522949219, 20.82757568359375, 56.45296859741211, 51.275211334228516, 10.910150527954102, 37.791481018066406, -3.7553482055664062, 20.275123596191406, 4.33857536315918, 37.18581771850586, 34.25952911376953, 24.806278228759766, 79.88117218017578, -0.9369964599609375, 58.5274658203125, 52.00657272338867, 7.857940673828125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000615.npy"} +{"epoch": 0.9030837004405287, "step": 616, "batch_size": 64, "mean": 32.59221649169922, "std": 30.33946990966797, "min": -24.325332641601562, "p10": -0.2779418945312494, "median": 30.918426513671875, "p90": 65.9711311340332, "max": 144.134521484375, "pos_frac": 0.890625, "sample": [59.83470916748047, 6.570075988769531, 40.802215576171875, 50.339969635009766, 4.238800048828125, 31.421911239624023, 17.193817138671875, 14.212677001953125, 36.25322723388672, 48.936798095703125, 63.98441696166992, 43.75958251953125, 40.569862365722656, 21.601348876953125, 54.722145080566406, 27.238306045532227, 26.473663330078125, 39.16181182861328, 9.726951599121094, 14.532978057861328, 127.20916748046875, 61.387916564941406, 4.76885986328125, 16.454681396484375, -24.325332641601562, -12.834991455078125, 32.19288635253906, 8.617019653320312, 34.072540283203125, 40.46357727050781, 144.134521484375, 25.040441513061523, -1.0487594604492188, 28.733055114746094, 5.864665985107422, -5.702770233154297, 19.10291862487793, 32.565093994140625, 44.080665588378906, 15.037322998046875, 0.2877960205078125, 78.95707702636719, 15.464859008789062, 73.86991119384766, 31.15685272216797, 86.39889526367188, 32.09467315673828, 20.408279418945312, 29.561569213867188, 30.977493286132812, -0.5204010009765625, 39.76829147338867, 47.52561569213867, -11.095649719238281, 21.46892547607422, 30.859359741210938, 78.97148132324219, 48.67155075073242, 33.12681198120117, 29.807472229003906, 65.17900085449219, 7.7869415283203125, 66.31061553955078, -18.52423858642578], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000616.npy"} +{"epoch": 0.9045521292217328, "step": 617, "batch_size": 64, "mean": 29.974029541015625, "std": 23.548133850097656, "min": -11.338577270507812, "p10": 2.3658327102661136, "median": 26.21994113922119, "p90": 62.1591293334961, "max": 100.15568542480469, "pos_frac": 0.953125, "sample": [23.216964721679688, 54.05759048461914, -0.203521728515625, 27.973983764648438, 11.450387954711914, 15.270614624023438, 59.853485107421875, 15.321849822998047, 52.99784851074219, 8.46235466003418, 19.84203338623047, 37.82453155517578, 10.001205444335938, 66.05968475341797, 23.205371856689453, 47.442298889160156, 2.7834911346435547, 31.507293701171875, 14.449562072753906, 100.15568542480469, 25.01390838623047, 12.102340698242188, 91.52320098876953, 53.95537567138672, 10.987834930419922, 34.33928680419922, 33.70446014404297, 14.721782684326172, 42.56318664550781, 70.98912048339844, 27.144350051879883, 28.072683334350586, 11.418563842773438, -7.786582946777344, 52.97537612915039, 57.57518005371094, 42.87220001220703, 37.44462585449219, 41.644439697265625, 41.863868713378906, 13.891918182373047, 26.26721954345703, 35.70759582519531, 60.95610046386719, 64.6641845703125, 49.62477111816406, 1.5422706604003906, -11.338577270507812, 25.0111083984375, 29.799034118652344, 5.591064453125, 1.90472412109375, 2.1868362426757812, 26.17266273498535, 8.952850341796875, 4.0944061279296875, 31.508155822753906, 19.50530242919922, 12.018938064575195, 13.805999755859375, 62.674713134765625, 23.0560302734375, 63.889312744140625, 2.0532760620117188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000617.npy"} +{"epoch": 0.9060205580029369, "step": 618, "batch_size": 64, "mean": 30.85983657836914, "std": 26.704391479492188, "min": -13.601818084716797, "p10": -1.3543628692626948, "median": 28.552696228027344, "p90": 68.87801971435547, "max": 105.69377136230469, "pos_frac": 0.875, "sample": [-6.058013916015625, 32.40919494628906, 9.960060119628906, 27.10334014892578, -12.676948547363281, 28.5887451171875, 20.211647033691406, 25.221942901611328, 105.69377136230469, 36.80863952636719, 59.09092712402344, 11.470781326293945, 39.43665313720703, 45.50349426269531, 28.516647338867188, 30.515167236328125, -1.9983596801757812, 71.40288543701172, 9.01861572265625, -1.605621337890625, -0.7680931091308594, 10.021907806396484, 17.759483337402344, 1.0865325927734375, 8.672119140625, 12.756080627441406, 32.65931701660156, 23.172821044921875, 67.18049621582031, 34.99720764160156, 12.416732788085938, 20.133779525756836, 61.363502502441406, 56.403236389160156, 62.386474609375, -13.601818084716797, -2.80682373046875, 29.83075714111328, -6.009527206420898, 32.84539794921875, 28.50353240966797, 6.944587707519531, 76.43429565429688, 2.0938720703125, 38.04081726074219, 3.819122314453125, 38.26806640625, 34.675323486328125, 44.5144157409668, 92.55335998535156, 33.84149932861328, 13.290355682373047, 87.49463653564453, 55.845184326171875, 56.66498565673828, 38.33293533325195, 10.405250549316406, 40.836090087890625, 69.60552978515625, 17.50067138671875, 21.275299072265625, 41.419891357421875, 81.18470764160156, 22.372108459472656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000618.npy"} +{"epoch": 0.9074889867841409, "step": 619, "batch_size": 64, "mean": 32.923736572265625, "std": 30.157283782958984, "min": -12.581672668457031, "p10": -1.8260116577148435, "median": 28.75472640991211, "p90": 80.46464385986332, "max": 125.7515869140625, "pos_frac": 0.875, "sample": [-1.7108154296875, 12.819290161132812, 24.2701416015625, 30.86609649658203, 23.868751525878906, 0.7455902099609375, 34.00534439086914, 5.617645263671875, 53.04790496826172, 29.436735153198242, 36.20299530029297, 20.098541259765625, 24.238174438476562, 125.7515869140625, 31.029510498046875, 48.45252990722656, 95.42730712890625, 18.52753257751465, 72.56681823730469, 46.16326904296875, -8.28656005859375, -3.146059036254883, 50.83827209472656, 97.173095703125, 10.514671325683594, -4.099233627319336, 4.873512268066406, 37.33988952636719, 40.808921813964844, 28.383743286132812, -11.870441436767578, 62.194313049316406, 21.74181365966797, 10.142646789550781, 40.139556884765625, 25.945404052734375, 27.261781692504883, 11.404792785644531, 9.665494918823242, 30.250967025756836, 42.76976776123047, 83.84942626953125, 56.49980163574219, 93.7633056640625, 49.904052734375, 25.41185188293457, -12.581672668457031, 91.51716613769531, 68.72433471679688, 9.812667846679688, 3.14996337890625, 17.738475799560547, 32.19129180908203, 86.28274536132812, 2.29595947265625, 15.578004837036133, 54.80291748046875, 10.891979217529297, 57.125999450683594, 29.125709533691406, 51.67201232910156, -5.293182373046875, -1.8753814697265625, 31.060577392578125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000619.npy"} +{"epoch": 0.908957415565345, "step": 620, "batch_size": 64, "mean": 35.61638259887695, "std": 26.59665298461914, "min": 2.2162094116210938, "p10": 6.39809513092041, "median": 29.959335327148438, "p90": 70.06089172363282, "max": 121.81719970703125, "pos_frac": 1.0, "sample": [44.821449279785156, 6.044040679931641, 83.04816436767578, 26.135643005371094, 6.350318908691406, 63.233673095703125, 52.29413986206055, 16.979707717895508, 7.655342102050781, 30.41424560546875, 4.102485656738281, 49.61321258544922, 22.294342041015625, 52.16539764404297, 70.50459289550781, 30.3587646484375, 36.211273193359375, 26.065868377685547, 38.439727783203125, 38.59416198730469, 51.003074645996094, 31.050617218017578, 12.99053955078125, 42.659507751464844, 11.865676879882812, 28.002784729003906, 44.119110107421875, 4.868612289428711, 47.72694396972656, 2.40997314453125, 12.294120788574219, 74.57568359375, 29.240341186523438, 121.81719970703125, 42.77203369140625, 24.768966674804688, 37.84630584716797, 31.61957550048828, 93.427978515625, 48.70911407470703, 16.445158004760742, 39.10774230957031, 19.47323226928711, 25.364036560058594, 101.38639831542969, 23.098407745361328, 69.02558898925781, 56.85975646972656, 23.81298065185547, 29.528778076171875, 22.150257110595703, 29.559906005859375, 111.71920776367188, 6.535896301269531, 43.66508865356445, 49.969879150390625, 17.612060546875, 2.2162094116210938, 6.1179351806640625, 40.53919982910156, 6.509572982788086, 7.3531951904296875, 7.6077880859375, 24.695556640625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000620.npy"} +{"epoch": 0.9104258443465492, "step": 621, "batch_size": 64, "mean": 36.26169967651367, "std": 26.917091369628906, "min": -21.76903533935547, "p10": 7.441926002502442, "median": 34.25612258911133, "p90": 61.58816223144532, "max": 142.17507934570312, "pos_frac": 0.9375, "sample": [86.70170593261719, 3.6133651733398438, -21.76903533935547, 47.630462646484375, 35.52952575683594, 26.127159118652344, 52.894752502441406, 40.00325012207031, -5.071048736572266, 89.95250701904297, 21.610641479492188, 25.463275909423828, 24.62964630126953, 63.313690185546875, 31.86626434326172, 21.709184646606445, -1.9336795806884766, 20.579946517944336, 46.76752471923828, 44.929168701171875, 142.17507934570312, 40.69120788574219, 61.695068359375, 26.437705993652344, 84.3603744506836, 54.10961151123047, 61.11151885986328, 21.026107788085938, 51.052154541015625, 39.92314529418945, 10.910537719726562, 36.346099853515625, 56.658172607421875, 7.068267822265625, 32.3255615234375, 57.512794494628906, 33.054256439208984, 30.44512939453125, 15.203468322753906, 21.2783203125, -6.232410430908203, 16.10131072998047, 15.314712524414062, 43.86509704589844, 39.365440368652344, 49.00177764892578, 29.62384033203125, 40.71138000488281, 20.54114532470703, 40.1878547668457, 5.9605865478515625, 8.31379508972168, 48.72071838378906, 61.338714599609375, 39.01081848144531, 15.980178833007812, 42.68040466308594, 30.000762939453125, 18.40258026123047, 30.256927490234375, 107.73095703125, 37.01824951171875, 13.463027954101562, 35.45798873901367], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000621.npy"} +{"epoch": 0.9118942731277533, "step": 622, "batch_size": 64, "mean": 36.819705963134766, "std": 28.990467071533203, "min": -15.751197814941406, "p10": 3.4215026855468755, "median": 34.931846618652344, "p90": 77.7275375366211, "max": 113.98033142089844, "pos_frac": 0.953125, "sample": [66.98393249511719, 47.485877990722656, 31.031707763671875, 19.67298126220703, 47.66215515136719, 82.24354553222656, 7.440219879150391, 34.93947219848633, 93.336669921875, 53.15420150756836, 39.24702453613281, 52.12281036376953, 39.17639923095703, 12.557235717773438, 7.825366973876953, 77.48094177246094, 14.005943298339844, 36.01873016357422, 57.613677978515625, 6.7113800048828125, 25.13949203491211, 27.34575653076172, 12.662567138671875, 94.69647216796875, 14.284767150878906, 3.175994873046875, 57.606910705566406, 30.2847900390625, 57.77501678466797, 8.184814453125, 42.24540710449219, 28.453338623046875, 113.98033142089844, 1.651449203491211, 14.487268447875977, 71.6590576171875, -14.9085693359375, 40.599815368652344, 76.02951049804688, 34.92422103881836, 62.419281005859375, 37.31682586669922, 3.994354248046875, 4.041481018066406, 38.97657012939453, 0.41103363037109375, 20.262069702148438, -15.751197814941406, 7.5686492919921875, 2.4375648498535156, 77.83322143554688, 56.54090881347656, 84.10209655761719, 66.32803344726562, 44.38301086425781, 30.392536163330078, 24.965797424316406, -4.822196960449219, 10.802669525146484, 78.2374267578125, 66.59464263916016, 27.191722869873047, 22.541934967041016, 42.704063415527344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000622.npy"} +{"epoch": 0.9133627019089574, "step": 623, "batch_size": 64, "mean": 30.37273597717285, "std": 29.304155349731445, "min": -18.77422332763672, "p10": -3.51177864074707, "median": 25.30834197998047, "p90": 71.72511444091798, "max": 130.27420043945312, "pos_frac": 0.859375, "sample": [25.28424072265625, 14.427078247070312, -3.691162109375, 57.16602325439453, 36.21363830566406, -3.361705780029297, 36.60448455810547, 82.53579711914062, 62.90675735473633, 24.29303741455078, 72.98480224609375, 16.947498321533203, 40.24976348876953, -3.5760955810546875, 48.17547607421875, 13.311424255371094, 72.3441162109375, 11.36566162109375, 11.59649658203125, 14.986846923828125, 10.40700912475586, 23.845401763916016, 32.63127136230469, 35.81629180908203, -16.092605590820312, 10.23480224609375, 70.28077697753906, 38.225032806396484, 14.076923370361328, 33.137046813964844, 37.0364990234375, -0.6039810180664062, -4.253700256347656, 13.671045303344727, -10.186285018920898, 4.28679084777832, 78.95442962646484, 27.36041259765625, 4.842866897583008, 6.190986633300781, 28.75933837890625, 42.910560607910156, 58.50844955444336, 66.89494323730469, 44.455780029296875, 6.386940002441406, 59.83589172363281, 55.261741638183594, 14.992931365966797, -18.77422332763672, 130.27420043945312, 90.93374633789062, 14.725845336914062, 4.913930892944336, 9.977806091308594, 56.289642333984375, -13.6041259765625, 25.332443237304688, 43.65578842163086, 36.38656234741211, 76.39826965332031, 41.89338684082031, 13.703544616699219, 13.116508483886719], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000623.npy"} +{"epoch": 0.9148311306901615, "step": 624, "batch_size": 64, "mean": 37.318939208984375, "std": 27.726625442504883, "min": -28.102996826171875, "p10": -2.399972534179687, "median": 40.670066833496094, "p90": 69.39462738037109, "max": 88.00047302246094, "pos_frac": 0.859375, "sample": [33.9625129699707, -12.729473114013672, 50.707000732421875, 49.39954376220703, 38.326507568359375, 72.31198120117188, 44.09589767456055, 57.36021423339844, 68.94091796875, 43.67516326904297, 40.14276885986328, 9.11285400390625, 40.23400115966797, 69.44570922851562, 53.48793029785156, 37.65605926513672, 67.3080825805664, 57.53053283691406, 86.1893310546875, 6.116874694824219, 32.75849914550781, 69.27543640136719, 40.565650939941406, 7.654733657836914, 40.77448272705078, 36.670860290527344, 76.74584197998047, 68.75069427490234, -8.509674072265625, 19.89960479736328, 43.22125244140625, 58.105926513671875, -8.332489013671875, 23.886905670166016, 26.95123291015625, 41.02735900878906, 53.66560745239258, 4.796464920043945, 84.10063171386719, 4.8179779052734375, 27.04460906982422, 36.310997009277344, 41.70359802246094, -28.102996826171875, 63.87467956542969, 42.68452453613281, 49.62202453613281, 35.64946746826172, 60.6923828125, 52.244834899902344, 31.867774963378906, -2.2061309814453125, -0.8276348114013672, 88.00047302246094, -15.431543350219727, 6.75567626953125, -2.4830474853515625, 6.536960601806641, -8.548412322998047, 45.17125701904297, 34.74646759033203, 54.3485107421875, 77.03594970703125, 61.620361328125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000624.npy"} +{"epoch": 0.9162995594713657, "step": 625, "batch_size": 64, "mean": 35.52130889892578, "std": 28.53171730041504, "min": -9.095909118652344, "p10": 3.0694746017456063, "median": 27.32398223876953, "p90": 71.45126724243164, "max": 110.394775390625, "pos_frac": 0.9375, "sample": [7.8975067138671875, 24.276288986206055, 52.564918518066406, 40.488548278808594, 27.442401885986328, 46.983062744140625, 58.61590576171875, 99.01031494140625, 27.032455444335938, 66.7718734741211, 22.2657470703125, -5.464752197265625, 20.03612518310547, 12.715896606445312, 71.37203979492188, 46.97895812988281, 1.832498550415039, 11.323246002197266, 110.394775390625, 37.79075622558594, 69.38873291015625, 22.124242782592773, 13.642772674560547, 2.649209976196289, 17.29582977294922, 38.843666076660156, 21.781814575195312, 21.24767303466797, 58.43788146972656, 15.976810455322266, 58.39543151855469, 32.32880401611328, 91.36449432373047, 40.60969161987305, 63.166709899902344, 43.11595153808594, 10.715164184570312, 54.49905776977539, 24.270709991455078, 6.202550888061523, -9.095909118652344, 23.494537353515625, 93.7112045288086, 19.0379638671875, 50.9141845703125, 56.874229431152344, 10.746660232543945, 49.296051025390625, 26.826675415039062, 31.253793716430664, 2.6960315704345703, -6.947479248046875, 31.276466369628906, 22.292640686035156, 27.205562591552734, 43.31919860839844, 3.9408416748046875, 32.61681365966797, 86.27721405029297, -8.569686889648438, 71.48522186279297, 6.340675354003906, 100.8428955078125, 21.142173767089844], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000625.npy"} +{"epoch": 0.9177679882525698, "step": 626, "batch_size": 64, "mean": 30.092287063598633, "std": 28.965452194213867, "min": -17.175506591796875, "p10": -2.8985300064086914, "median": 21.56238555908203, "p90": 71.35664901733398, "max": 102.23062133789062, "pos_frac": 0.859375, "sample": [25.749237060546875, 34.15519714355469, 11.355796813964844, 6.193412780761719, 13.48779296875, 41.255027770996094, 76.5006332397461, 11.507270812988281, 17.491668701171875, 59.988739013671875, 46.865478515625, 32.41884231567383, 71.32096862792969, 84.89370727539062, -2.6972503662109375, -5.906288146972656, 29.848487854003906, 10.54460334777832, 12.45233154296875, 10.407207489013672, 11.942626953125, 1.8229904174804688, 62.21336364746094, 11.507478713989258, -9.207490921020508, 23.326187133789062, 34.643035888671875, 28.343788146972656, 13.954875946044922, 71.37194061279297, 12.994577407836914, 61.985328674316406, 1.5012626647949219, 10.668720245361328, 56.15045928955078, 48.327606201171875, 53.21380615234375, 42.264488220214844, 34.210960388183594, 19.39423370361328, 87.47482299804688, 19.798583984375, 102.23062133789062, 101.01182556152344, 55.3834228515625, 25.32062530517578, -7.1520843505859375, 16.841400146484375, 23.74664306640625, 15.535888671875, -3.605499267578125, 56.97547912597656, 89.66121673583984, 53.61285400390625, 13.491714477539062, -0.3264007568359375, 14.142333984375, 59.87547302246094, -17.175506591796875, -5.2804718017578125, 5.5285491943359375, 11.823966979980469, 31.512466430664062, -2.984792709350586], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000626.npy"} +{"epoch": 0.9192364170337739, "step": 627, "batch_size": 64, "mean": 33.95563507080078, "std": 34.09070587158203, "min": -31.861778259277344, "p10": -3.2723566055297852, "median": 27.479564666748047, "p90": 85.23593215942388, "max": 131.99078369140625, "pos_frac": 0.859375, "sample": [31.88971710205078, 54.09320068359375, 90.90275573730469, 55.48847961425781, 15.537643432617188, 4.841400146484375, 58.30286407470703, 37.5653076171875, 91.68428039550781, 28.451171875, 58.53666687011719, -7.760223388671875, 14.393753051757812, 11.766347885131836, -3.3083019256591797, 53.69029235839844, 8.950122833251953, 31.553863525390625, 48.13908386230469, 51.83453369140625, -16.08148193359375, 42.26862716674805, 68.28340148925781, 1.8164081573486328, 116.95584106445312, -9.87615966796875, 58.30473327636719, 92.69882202148438, 7.81243896484375, 4.513959884643555, 15.066612243652344, 8.132488250732422, 41.622955322265625, 131.99078369140625, 72.01334381103516, 6.772006988525391, 60.98954772949219, 19.955612182617188, 55.62751770019531, -6.315547943115234, 3.5513858795166016, 22.460302352905273, 105.90843200683594, -10.84722900390625, 17.664230346679688, 2.1293411254882812, 52.10856628417969, 25.35301971435547, -3.1884841918945312, 20.033493041992188, 65.90528869628906, 15.289459228515625, 30.588077545166016, -31.861778259277344, 13.131542205810547, 46.38909912109375, 25.782318115234375, 26.507957458496094, 10.656944274902344, 92.86451721191406, 53.88975524902344, -2.534271240234375, 48.42073059082031, 33.85295104980469], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000627.npy"} +{"epoch": 0.920704845814978, "step": 628, "batch_size": 64, "mean": 35.616455078125, "std": 26.15797996520996, "min": -8.909416198730469, "p10": 4.7138875961303714, "median": 31.757064819335938, "p90": 71.54060668945313, "max": 103.80914306640625, "pos_frac": 0.96875, "sample": [30.941862106323242, -0.5642166137695312, 40.24609375, 52.354248046875, 37.046875, 54.397735595703125, 16.326339721679688, 30.64502716064453, 12.636642456054688, 15.497865676879883, 17.00086212158203, 21.05644989013672, -8.909416198730469, 71.88336181640625, 31.310211181640625, 49.66505432128906, 9.360057830810547, 3.4176101684570312, 103.80914306640625, 52.228729248046875, 50.67218017578125, 40.17393493652344, 29.187423706054688, 48.524986267089844, 15.152107238769531, 1.6358489990234375, 37.67280578613281, 34.90984344482422, 16.58924102783203, 26.522083282470703, 70.7408447265625, 16.55638885498047, 33.48875427246094, 82.19281005859375, 45.975341796875, 69.01264953613281, 25.885709762573242, 32.20391845703125, 20.07436180114746, 10.402965545654297, 40.19450378417969, 3.9111671447753906, 4.451395034790039, 42.89335632324219, 83.3326187133789, 22.870010375976562, 65.28490447998047, 11.822341918945312, 10.652366638183594, 15.0218505859375, 51.965606689453125, 24.12053108215332, 97.2214126586914, 95.26270294189453, 50.890106201171875, 9.747259140014648, 58.725006103515625, 5.3263702392578125, 72.6528091430664, 35.047245025634766, 65.62857055664062, 13.848800659179688, 50.292205810546875, 0.3652381896972656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000628.npy"} +{"epoch": 0.922173274596182, "step": 629, "batch_size": 64, "mean": 31.81080436706543, "std": 25.537303924560547, "min": -28.626632690429688, "p10": 3.609826850891113, "median": 30.256628036499023, "p90": 59.40623092651368, "max": 136.9967041015625, "pos_frac": 0.96875, "sample": [57.06707763671875, 50.75973892211914, 5.26264762878418, 25.259824752807617, 72.65126037597656, 18.981060028076172, 19.759849548339844, 30.959213256835938, 25.08667755126953, 57.12663269042969, 24.396316528320312, 17.40904998779297, 53.32147979736328, 4.756374359130859, 36.805213928222656, 17.889915466308594, 43.701148986816406, 39.36275863647461, 18.0678653717041, 28.90430450439453, 3.6064376831054688, 75.30271911621094, 47.61888122558594, 0.013885498046875, 21.550525665283203, 46.514060974121094, 54.337791442871094, 3.693115234375, 49.673301696777344, 42.203125, 13.271678924560547, 0.34024810791015625, 136.9967041015625, 36.18597412109375, 33.878814697265625, 7.029073715209961, 9.252220153808594, 50.37799072265625, 54.34336853027344, 2.4570159912109375, 20.24401092529297, 22.353988647460938, 31.25470733642578, 38.43775939941406, 64.53350830078125, 27.31085968017578, 45.021026611328125, -4.165351867675781, 33.19647216796875, 3.617734909057617, 30.821582794189453, 29.691673278808594, 0.8497314453125, 60.61563491821289, 5.1747589111328125, 79.27455139160156, 39.16748809814453, 60.383201599121094, 27.94927978515625, 41.19959259033203, 45.047454833984375, 21.18646240234375, 5.1766815185546875, -28.626632690429688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000629.npy"} +{"epoch": 0.9236417033773862, "step": 630, "batch_size": 64, "mean": 30.402843475341797, "std": 25.56612205505371, "min": -21.038734436035156, "p10": 0.259550666809083, "median": 27.91111946105957, "p90": 70.356298828125, "max": 84.62535095214844, "pos_frac": 0.890625, "sample": [83.61354064941406, 3.094268798828125, 11.373544692993164, 20.712074279785156, -4.268272399902344, 71.33223724365234, 49.681365966796875, 76.07752990722656, 75.57696533203125, 37.65914535522461, 39.82575988769531, 28.2843017578125, 31.024169921875, 68.07910919189453, 68.06426239013672, 11.473831176757812, 7.315153121948242, -0.23346710205078125, 24.701072692871094, 44.23520278930664, -2.3570556640625, 14.984411239624023, 9.332351684570312, 59.90690994262695, -3.4862289428710938, 84.62535095214844, 25.068086624145508, 44.31446838378906, 13.880149841308594, 15.511016845703125, 1.212697982788086, 53.367820739746094, 79.303955078125, -9.22528076171875, 47.5634880065918, 31.259191513061523, 31.903194427490234, 48.81608200073242, 7.6261749267578125, 52.898895263671875, 14.975051879882812, 9.56167984008789, 9.367294311523438, 11.456245422363281, 41.21612548828125, 18.555389404296875, 37.5157470703125, 50.387306213378906, 27.53793716430664, 18.08233642578125, 34.503814697265625, 41.579017639160156, 15.216880798339844, 35.10906982421875, 3.7233543395996094, 34.2213134765625, 17.6268310546875, 25.17620849609375, 30.944469451904297, -21.038734436035156, 79.79464721679688, -0.1489410400390625, 53.341915130615234, 2.949522018432617], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000630.npy"} +{"epoch": 0.9251101321585903, "step": 631, "batch_size": 64, "mean": 28.286867141723633, "std": 29.5904598236084, "min": -48.538818359375, "p10": -7.662013244628903, "median": 25.141874313354492, "p90": 75.69249572753907, "max": 102.47731018066406, "pos_frac": 0.859375, "sample": [79.5079345703125, 23.60882568359375, 26.455856323242188, 49.350460052490234, 33.92637634277344, 48.02635192871094, 50.397605895996094, 32.599525451660156, -11.085018157958984, 0.28619384765625, -14.083221435546875, 97.03376770019531, 10.4312744140625, 57.272743225097656, 25.079639434814453, -4.15826416015625, 25.20410919189453, 75.58438110351562, 30.872085571289062, 7.821126937866211, 10.344659805297852, 8.99131965637207, 44.29277801513672, -1.7889633178710938, 53.74300765991211, 40.6048469543457, 38.63631820678711, 19.339141845703125, 25.556900024414062, 5.220001220703125, -9.465194702148438, 40.735435485839844, 23.40447235107422, 27.135780334472656, 77.73748779296875, -18.04876708984375, 14.432518005371094, 8.758499145507812, 19.886411666870117, 60.11578369140625, 24.84186553955078, 9.027589797973633, 75.73883056640625, 77.31715393066406, 18.801128387451172, 4.953561782836914, 82.00590515136719, 7.6195831298828125, 40.383575439453125, 34.89599609375, 30.208816528320312, 6.624452590942383, 18.594844818115234, 18.31293487548828, -48.538818359375, -9.163619995117188, 52.467071533203125, 36.08349609375, 102.47731018066406, -27.303199768066406, 43.49285888671875, 23.79534912109375, 13.026741027832031, 40.931922912597656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000631.npy"} +{"epoch": 0.9265785609397944, "step": 632, "batch_size": 64, "mean": 32.566551208496094, "std": 30.02157211303711, "min": -61.45722961425781, "p10": -5.9320505142211895, "median": 33.68462562561035, "p90": 63.4959457397461, "max": 107.74049377441406, "pos_frac": 0.84375, "sample": [5.770717620849609, -61.45722961425781, 44.09281921386719, 36.43202209472656, 59.81922149658203, -4.181800842285156, 55.60746765136719, 87.05409240722656, 71.00178527832031, 40.31483459472656, 67.39094543457031, 12.7945556640625, 55.54810333251953, 12.768402099609375, 39.16101837158203, 24.183734893798828, 59.428558349609375, 63.690582275390625, -1.1010704040527344, 18.99353790283203, 45.97681427001953, 33.128814697265625, 60.296714782714844, 55.92610168457031, -7.326103210449219, 33.33766174316406, 60.271514892578125, -20.009803771972656, 43.235328674316406, 27.31484031677246, 14.380035400390625, -24.216115951538086, 96.24607849121094, 25.594802856445312, -3.7843170166015625, 22.56186294555664, 48.68622589111328, 33.5135383605957, 36.423561096191406, 19.84521484375, 52.25818634033203, 11.075515747070312, -10.045654296875, 33.855712890625, 20.725067138671875, 47.64691162109375, 38.12037658691406, 54.29296112060547, 107.74049377441406, 78.76044464111328, 63.04179382324219, 41.2816162109375, -21.77935028076172, 18.257354736328125, 13.28265380859375, 47.074554443359375, -6.682157516479492, 42.24005889892578, 16.668148040771484, 21.57990264892578, 53.829010009765625, 29.016416549682617, 13.143564224243164, 30.160614013671875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000632.npy"} +{"epoch": 0.9280469897209985, "step": 633, "batch_size": 64, "mean": 27.419578552246094, "std": 25.544029235839844, "min": -21.05096435546875, "p10": -2.2383945465087884, "median": 20.775885581970215, "p90": 60.08617172241212, "max": 86.0582275390625, "pos_frac": 0.84375, "sample": [14.827354431152344, 53.16407012939453, 14.597850799560547, 17.55841827392578, 49.41668701171875, 42.1947021484375, -1.0664520263671875, 1.3684310913085938, 71.34563446044922, 36.92454528808594, 21.294818878173828, 56.60929870605469, 53.27015686035156, -2.4582595825195312, 15.310897827148438, 1.1843318939208984, 39.11278533935547, 43.031166076660156, 17.654518127441406, 25.56622314453125, 14.276641845703125, 86.0582275390625, 52.40254211425781, -3.0625762939453125, 74.114501953125, 29.047622680664062, 20.2569522857666, 28.14850425720215, 1.62060546875, -4.118230819702148, 73.40013122558594, 32.464744567871094, 45.00126647949219, 13.391555786132812, -7.000951766967773, -1.7253761291503906, -0.9183502197265625, 8.318161010742188, -21.05096435546875, 30.014432907104492, 60.852081298828125, 47.12811279296875, 10.087974548339844, 3.6693572998046875, 57.070465087890625, 36.2669677734375, 39.416160583496094, 84.60083770751953, 58.299049377441406, 10.319282531738281, -9.2911376953125, 18.744064331054688, 8.902368545532227, 13.712089538574219, 12.400249481201172, 16.54925537109375, 47.89437484741211, 84.218505859375, -2.780355453491211, 23.83544921875, 15.768287658691406, 8.264541625976562, 33.618568420410156, 33.75981140136719], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000633.npy"} +{"epoch": 0.9295154185022027, "step": 634, "batch_size": 64, "mean": 27.910812377929688, "std": 29.121278762817383, "min": -25.79925537109375, "p10": -5.474050903320312, "median": 23.10918426513672, "p90": 72.55616302490235, "max": 109.80221557617188, "pos_frac": 0.828125, "sample": [6.716697692871094, 34.63453674316406, 28.624040603637695, 86.64595031738281, 18.475122451782227, 6.974485397338867, 2.0223159790039062, 9.731193542480469, 25.5721435546875, 82.15158081054688, -22.161516189575195, 24.550201416015625, 6.939416885375977, 20.772979736328125, 81.9400634765625, 72.82679748535156, -9.903736114501953, 11.88140869140625, 39.786102294921875, 13.604766845703125, 21.668167114257812, -25.79925537109375, -1.0339508056640625, 24.665451049804688, -4.928169250488281, 12.05801773071289, -6.6683502197265625, 57.61133575439453, 48.914337158203125, 14.747447967529297, 71.9246826171875, 12.146903991699219, 25.68962860107422, -10.232551574707031, 75.59005737304688, 57.53461837768555, 109.80221557617188, 19.982250213623047, 43.44062423706055, -0.23295211791992188, 45.542842864990234, 2.7025909423828125, -1.3144607543945312, 11.108341217041016, 26.59693145751953, 6.47608757019043, 47.020355224609375, 16.92730712890625, 35.03330993652344, 48.36576843261719, 16.757688522338867, 59.97981262207031, 61.24298858642578, 54.199623107910156, 59.300018310546875, 26.235252380371094, 28.521156311035156, 83.49874877929688, -8.561813354492188, 28.121612548828125, -5.708000183105469, 12.981094360351562, 11.803003311157227, 30.796630859375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000634.npy"} +{"epoch": 0.9309838472834068, "step": 635, "batch_size": 64, "mean": 28.63296127319336, "std": 22.558204650878906, "min": -14.66988754272461, "p10": 2.919272422790528, "median": 26.074861526489258, "p90": 61.150843811035166, "max": 96.20649719238281, "pos_frac": 0.921875, "sample": [11.648822784423828, 44.283912658691406, 62.09429931640625, 7.761322021484375, 55.042503356933594, 26.028507232666016, 13.255393981933594, 12.004024505615234, 22.773094177246094, 14.882745742797852, 33.35248565673828, 53.12498474121094, 30.645095825195312, -4.7563323974609375, 3.26904296875, 45.71333312988281, 14.836112976074219, 32.75372314453125, 69.59429931640625, 26.1212158203125, 33.09842300415039, 15.99981689453125, 29.95947265625, 64.77568054199219, 45.050907135009766, 23.278610229492188, 1.8299713134765625, 26.41895294189453, 33.45408630371094, 79.52925109863281, 5.36851692199707, 53.05641174316406, -14.66988754272461, 66.96409606933594, 22.5723876953125, 23.50212860107422, 39.15283966064453, 11.82962417602539, 3.2415695190429688, 38.97108459472656, 16.98453140258789, 67.58306121826172, 96.20649719238281, 35.88233947753906, 23.889907836914062, 15.003646850585938, 58.94944763183594, 7.376152038574219, -6.453514099121094, 16.59471893310547, 36.02496337890625, -1.4905242919921875, 48.81036376953125, 37.78521728515625, 26.432804107666016, 2.7811450958251953, 18.861446380615234, 18.659683227539062, 31.332611083984375, 43.913726806640625, 7.151294708251953, -0.76300048828125, 42.958343505859375, 10.222122192382812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000635.npy"} +{"epoch": 0.9324522760646109, "step": 636, "batch_size": 64, "mean": 33.00461196899414, "std": 24.0425968170166, "min": -11.745904922485352, "p10": -0.9414062499999987, "median": 31.804126739501953, "p90": 59.25036544799805, "max": 95.77597045898438, "pos_frac": 0.890625, "sample": [20.254152297973633, 23.6064453125, 25.477386474609375, 34.81450653076172, 23.558143615722656, 11.068256378173828, 76.44383239746094, -10.374595642089844, 53.362693786621094, -1.89617919921875, -6.101108551025391, 38.137718200683594, 31.310020446777344, 46.429725646972656, 26.696929931640625, 27.46356964111328, 22.675308227539062, 59.46404266357422, 49.21820068359375, 27.87531280517578, 56.2445068359375, 20.77753448486328, 16.576560974121094, 2.991607666015625, 52.825523376464844, 28.963165283203125, 48.121551513671875, 48.62912368774414, 34.305084228515625, 30.239028930664062, -5.907356262207031, 7.2775115966796875, 38.00883102416992, 32.29823303222656, 0.8671760559082031, 88.46080017089844, 51.0540771484375, 58.75178527832031, 47.74787902832031, -9.986419677734375, 42.8304443359375, 22.284879684448242, 49.046913146972656, 75.45477294921875, 22.9764404296875, 57.13844299316406, 52.04986572265625, 76.00164794921875, 5.781734466552734, -11.745904922485352, 45.53832244873047, -1.4910888671875, 42.053680419921875, 37.503318786621094, 42.78049850463867, 27.717357635498047, 59.53346252441406, 95.77597045898438, 33.903839111328125, 37.54780578613281, 0.3411865234375, 11.570919036865234, 30.162349700927734, 29.80768585205078], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000636.npy"} +{"epoch": 0.933920704845815, "step": 637, "batch_size": 64, "mean": 32.936134338378906, "std": 25.70160675048828, "min": -12.292789459228516, "p10": 2.2160900115966813, "median": 26.831416130065918, "p90": 74.32971496582033, "max": 111.09378814697266, "pos_frac": 0.921875, "sample": [34.96148681640625, 19.51768684387207, -6.9582366943359375, 53.296546936035156, 34.01010513305664, 46.57585906982422, 51.03683090209961, -7.476285934448242, 79.42141723632812, 56.05097961425781, 44.12745666503906, 39.65337371826172, 16.249710083007812, 18.67737579345703, 43.150909423828125, 89.19114685058594, 35.5513916015625, 17.456100463867188, 64.68212890625, 41.137542724609375, 87.29385375976562, 23.639190673828125, 28.391098022460938, 75.54766845703125, 21.905075073242188, 36.24287414550781, 15.579025268554688, 35.029693603515625, 76.51876831054688, 8.62811279296875, 28.08526611328125, -12.292789459228516, 17.17559814453125, 0.7629547119140625, 18.80036163330078, -3.498350143432617, 19.643508911132812, 21.54937744140625, 43.76093673706055, 26.485502243041992, 71.48782348632812, 111.09378814697266, 10.077293395996094, 42.431640625, 26.07587432861328, 31.801345825195312, 47.87517547607422, 6.396598815917969, 51.638145446777344, 27.177330017089844, 44.81890869140625, -3.189065933227539, 23.329254150390625, 40.27099609375, 11.771480560302734, 10.142505645751953, 25.41611671447754, 83.79707336425781, 26.24708366394043, 24.632644653320312, 1.546640396118164, 25.271774291992188, 24.46282958984375, 3.778139114379883], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000637.npy"} +{"epoch": 0.9353891336270191, "step": 638, "batch_size": 64, "mean": 37.08156967163086, "std": 28.503551483154297, "min": -15.512519836425781, "p10": 6.084112548828126, "median": 32.81105422973633, "p90": 80.96904754638673, "max": 109.7422103881836, "pos_frac": 0.96875, "sample": [95.30349731445312, 2.555999755859375, 60.992523193359375, 28.81000518798828, 47.272987365722656, 14.290504455566406, 19.61411476135254, 43.39173889160156, 33.912906646728516, 4.387811660766602, 43.17823028564453, 89.06094360351562, 15.985710144042969, 68.92433166503906, 49.849945068359375, 12.246761322021484, 5.666114807128906, 44.05127716064453, 45.00858688354492, 33.06557846069336, -15.512519836425781, 22.918914794921875, 34.27448272705078, 43.41777038574219, 32.5565299987793, 109.7422103881836, 5.433032989501953, 30.795978546142578, 96.89071655273438, 39.414405822753906, 48.87497329711914, 82.41258239746094, 29.088546752929688, 46.38945007324219, 71.28758239746094, 19.181304931640625, 33.10054016113281, 8.50374984741211, 14.995635986328125, 19.18112564086914, 12.046663284301758, 26.92249298095703, 75.65193939208984, 64.33731079101562, -12.461448669433594, 4.275897979736328, 77.60079956054688, 36.314430236816406, 11.822471618652344, 10.678787231445312, 22.443544387817383, 7.059440612792969, 56.99896240234375, 27.305519104003906, 45.1925048828125, 7.5322265625, 11.304222106933594, 50.669471740722656, 87.24671936035156, 100.78077697753906, 15.441848754882812, 30.407554626464844, 48.166465759277344, 24.965248107910156], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000638.npy"} +{"epoch": 0.9368575624082232, "step": 639, "batch_size": 64, "mean": 31.09198760986328, "std": 24.72230339050293, "min": -26.329757690429688, "p10": 3.054883193969727, "median": 25.991379737854004, "p90": 68.35945205688476, "max": 83.62120056152344, "pos_frac": 0.953125, "sample": [1.6311149597167969, 31.87770652770996, 43.951385498046875, 41.64418029785156, 50.706390380859375, 2.7230377197265625, 65.97892761230469, 51.592437744140625, 36.141502380371094, 6.771148681640625, 23.083633422851562, 3.8291893005371094, 31.437828063964844, -26.329757690429688, 11.716644287109375, 23.27667236328125, 29.845169067382812, 11.64788818359375, 53.8524169921875, 10.010047912597656, 69.12566375732422, 17.98564338684082, 68.42169189453125, 49.03114318847656, 68.21422576904297, 18.662029266357422, 23.942935943603516, 26.6175594329834, 38.44940948486328, 57.00837707519531, 58.66735076904297, 8.841911315917969, 25.433303833007812, 1.9938507080078125, 33.15629196166992, 17.55660629272461, 26.248315811157227, 71.31917572021484, 63.74443054199219, 25.39287567138672, 16.535568237304688, 25.613311767578125, 50.12902069091797, 35.439125061035156, 83.62120056152344, 11.119651794433594, 24.29828643798828, 25.73444366455078, 0.6403350830078125, 22.053085327148438, 11.012956619262695, 5.170928955078125, 27.144432067871094, 15.20223617553711, -23.55634307861328, -0.7341461181640625, 36.576629638671875, 11.097213745117188, 30.745895385742188, 76.19110107421875, 22.16364860534668, 78.57838439941406, 82.54571533203125, 47.36408996582031], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000639.npy"} +{"epoch": 0.9383259911894273, "step": 640, "batch_size": 64, "mean": 36.84351348876953, "std": 30.175405502319336, "min": -1.3929557800292969, "p10": 6.761939239501953, "median": 34.40303039550781, "p90": 72.008895111084, "max": 153.1376953125, "pos_frac": 0.9375, "sample": [47.95612335205078, 81.90977478027344, 2.629148483276367, 57.66035842895508, 15.810762405395508, 14.0784912109375, 65.50475311279297, 61.3502311706543, 12.133947372436523, 7.259910583496094, 39.87748718261719, 37.37828826904297, 9.875328063964844, 34.71880340576172, 7.614269256591797, 35.52915954589844, 61.23756408691406, 8.58563232421875, 11.113700866699219, -1.1587715148925781, 20.111923217773438, 45.08818817138672, 22.837181091308594, -0.07767868041992188, 19.98556137084961, -1.3929557800292969, 25.96862030029297, 46.46076965332031, 124.55047607421875, 75.11186218261719, 74.46682739257812, 61.14158630371094, 26.984317779541016, 48.08384704589844, 153.1376953125, 0.016366958618164062, -0.4691925048828125, 15.742271423339844, 10.720123291015625, 26.23968505859375, 22.57600212097168, 34.661712646484375, 43.11407470703125, 55.71232604980469, 23.023590087890625, 34.251556396484375, 34.55450439453125, 59.55604553222656, 12.451377868652344, 38.182899475097656, 87.92745971679688, 61.19221496582031, 8.136215209960938, 52.970001220703125, 7.347198486328125, 6.54852294921875, 49.81562805175781, 43.09800338745117, 25.960281372070312, 66.27371978759766, 37.46522521972656, 94.87759399414062, 28.4735107421875, 24.042686462402344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000640.npy"} +{"epoch": 0.9397944199706314, "step": 641, "batch_size": 64, "mean": 32.17498779296875, "std": 30.656888961791992, "min": -10.617446899414062, "p10": 7.408464431762695, "median": 25.50763702392578, "p90": 62.327046203613286, "max": 195.11923217773438, "pos_frac": 0.9375, "sample": [7.728004455566406, 8.115131378173828, 66.7528305053711, 76.02902221679688, 13.274429321289062, 41.87397766113281, 33.230194091796875, 38.368072509765625, 12.787300109863281, 7.626319885253906, 7.315097808837891, 36.32427215576172, -0.1465606689453125, 13.0794677734375, 195.11923217773438, 48.8530387878418, 12.372642517089844, 11.150169372558594, 23.382293701171875, 38.40348815917969, 59.27117919921875, 12.130645751953125, 12.04965591430664, 60.55516052246094, 20.617908477783203, 7.808563232421875, -10.617446899414062, 19.10645294189453, 22.251262664794922, 26.938949584960938, 16.075180053710938, 35.55699157714844, 10.510255813598633, -7.2738037109375, 52.90709686279297, 33.40968704223633, 89.30308532714844, 12.248733520507812, 9.988006591796875, 32.141990661621094, 27.502653121948242, -4.277868270874023, 18.099546432495117, 9.50494384765625, 21.603958129882812, 10.714637756347656, 11.761341094970703, 6.174446105957031, 50.65684509277344, 79.44496154785156, 75.40840911865234, 59.11225128173828, 54.668426513671875, 44.11943054199219, 45.47785186767578, 47.959754943847656, 44.11546325683594, 63.08642578125, 32.60981750488281, 24.076324462890625, 4.538307189941406, 38.87933349609375, 32.06773376464844, 55.27619934082031], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000641.npy"} +{"epoch": 0.9412628487518355, "step": 642, "batch_size": 64, "mean": 35.434226989746094, "std": 30.842571258544922, "min": -24.064788818359375, "p10": 2.500834655761721, "median": 29.775531768798828, "p90": 76.91655731201172, "max": 122.2484130859375, "pos_frac": 0.90625, "sample": [45.0142936706543, 55.439849853515625, 38.53041076660156, 34.44300079345703, 23.31182098388672, 17.124526977539062, 92.46223449707031, -2.6075439453125, 5.145393371582031, 31.350997924804688, 5.291542053222656, 4.896820068359375, 14.594314575195312, 13.845329284667969, 28.20006561279297, 74.886962890625, 26.745269775390625, 77.00794982910156, -23.665771484375, 23.762554168701172, 8.422233581542969, 13.7821044921875, 49.62109375, 19.76105308532715, 38.62192916870117, 9.775650024414062, 59.603721618652344, 48.85674285888672, 26.558692932128906, 52.586761474609375, 39.402984619140625, 32.737098693847656, 94.77157592773438, 60.03837203979492, 1.4739837646484375, 92.8994140625, 57.48805236816406, 76.70330810546875, -24.064788818359375, 19.218524932861328, 70.54090881347656, 47.70570373535156, 20.171485900878906, 69.29179382324219, -11.57769775390625, 78.12492370605469, 49.868194580078125, 45.325340270996094, 14.49884033203125, 32.79826354980469, 122.2484130859375, 27.105438232421875, -0.635955810546875, 6.0097503662109375, 15.834104537963867, -6.671062469482422, 19.052566528320312, 98.31739807128906, 9.45477294921875, 52.6630859375, 23.4827880859375, 60.51044464111328, 8.307731628417969, 51.32490539550781], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000642.npy"} +{"epoch": 0.9427312775330396, "step": 643, "batch_size": 64, "mean": 30.795534133911133, "std": 24.469932556152344, "min": -27.91172218322754, "p10": 4.888920593261719, "median": 29.54468536376953, "p90": 58.87992477416992, "max": 107.73214721679688, "pos_frac": 0.9375, "sample": [47.1573486328125, 49.45135498046875, 35.734153747558594, 51.39739990234375, 44.03271484375, 76.77938079833984, 29.077049255371094, 8.586891174316406, -27.91172218322754, 21.21733856201172, 4.848987579345703, 31.56866455078125, 15.80908203125, 28.140789031982422, -2.606037139892578, 67.64291381835938, 58.3485107421875, 43.424346923828125, 15.357208251953125, 59.10767364501953, 5.986656188964844, 17.579675674438477, 12.272857666015625, 9.205793380737305, 14.568645477294922, 18.138591766357422, 5.831211090087891, 35.10680389404297, 28.077415466308594, 24.37378692626953, -11.355354309082031, 22.645475387573242, -8.787490844726562, 68.67677307128906, 107.73214721679688, 80.11376953125, 20.652603149414062, 37.84357452392578, 20.59107780456543, 36.24298095703125, 20.662517547607422, 56.540679931640625, 81.19393920898438, 56.46917724609375, 18.415176391601562, 33.78301239013672, 3.7591819763183594, 39.86940002441406, 30.12036895751953, 14.658193588256836, 42.88218688964844, 50.82433319091797, 4.982097625732422, 6.3308868408203125, 30.01232147216797, 17.509916305541992, 35.522560119628906, 33.378623962402344, 44.60502624511719, 57.18550109863281, 43.76766586303711, 30.922164916992188, 3.3776016235351562, 11.480583190917969], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000643.npy"} +{"epoch": 0.9441997063142438, "step": 644, "batch_size": 64, "mean": 30.231281280517578, "std": 28.853412628173828, "min": -25.398818969726562, "p10": -0.28005638122558574, "median": 23.710854530334473, "p90": 73.54161376953127, "max": 92.43980407714844, "pos_frac": 0.875, "sample": [65.24185180664062, 16.606477737426758, 92.43980407714844, -25.398818969726562, -13.304763793945312, 32.81035614013672, -15.126541137695312, 22.140518188476562, 19.814491271972656, 17.89097023010254, 12.899551391601562, 59.836769104003906, -11.003150939941406, 35.81093978881836, 21.944679260253906, 90.63676452636719, 12.253738403320312, 23.631290435791016, 17.89532470703125, 32.00379943847656, 39.02496337890625, 55.03802490234375, 23.79041862487793, 11.64678955078125, 68.695068359375, 30.78814697265625, -5.877647399902344, 26.353029251098633, 37.99170684814453, 86.03132629394531, 11.105325698852539, 5.085262298583984, 51.88081359863281, 2.6864395141601562, 41.307159423828125, 4.320219039916992, 51.86424255371094, 26.118988037109375, 13.40267562866211, 30.207794189453125, 22.88182830810547, 17.512939453125, 61.236663818359375, 4.149343490600586, 74.6280517578125, 71.006591796875, 33.18511199951172, -0.3515281677246094, 9.034963607788086, 69.26277160644531, 53.99907684326172, 25.362258911132812, -4.808513641357422, 16.2457275390625, 35.029754638671875, 83.60218811035156, 37.90618896484375, 1.1440696716308594, 19.945510864257812, -0.11328887939453125, 0.05048942565917969, 86.66746520996094, 11.933748245239258, 84.8057861328125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000644.npy"} +{"epoch": 0.9456681350954479, "step": 645, "batch_size": 64, "mean": 28.9981746673584, "std": 28.357072830200195, "min": -17.012168884277344, "p10": -0.7628366470336914, "median": 25.833885192871094, "p90": 66.13080673217776, "max": 104.31535339355469, "pos_frac": 0.859375, "sample": [19.944427490234375, 43.01866149902344, 1.093048095703125, 26.764816284179688, 2.086517333984375, 4.8134613037109375, 44.01649475097656, 33.03233337402344, 51.73796463012695, 4.443883895874023, 21.173015594482422, 25.181961059570312, 32.04528045654297, -5.722568511962891, 19.128292083740234, 21.8900146484375, 6.56744384765625, 30.72770881652832, 104.31535339355469, 13.524076461791992, 27.24357032775879, 29.897483825683594, 53.915367126464844, 29.91211700439453, 19.961769104003906, 43.5105094909668, 18.871841430664062, 5.790863037109375, -17.012168884277344, -5.1527252197265625, -0.7767314910888672, 0.3074302673339844, -0.7304153442382812, 27.4237060546875, 99.97713470458984, 12.025299072265625, 45.408203125, 68.18850708007812, 13.475635528564453, 10.194549560546875, 103.62504577636719, 36.891265869140625, -4.222450256347656, 57.05281448364258, -0.6235504150390625, 73.43911743164062, 50.04789733886719, 56.60093688964844, 28.80426788330078, 24.149391174316406, 39.958770751953125, 2.5384674072265625, 61.329505920410156, 10.6619873046875, 77.20916748046875, 95.571044921875, 26.485809326171875, 39.27093505859375, -11.2691650390625, 49.14218521118164, 24.604888916015625, 5.297657012939453, 35.36286544799805, -8.25973129272461], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000645.npy"} +{"epoch": 0.947136563876652, "step": 646, "batch_size": 64, "mean": 33.072837829589844, "std": 22.5963191986084, "min": -12.495393753051758, "p10": 8.701551055908203, "median": 33.394636154174805, "p90": 62.8078582763672, "max": 91.25164031982422, "pos_frac": 0.953125, "sample": [91.25164031982422, 8.597389221191406, 51.105812072753906, 59.185943603515625, 28.235763549804688, 15.192554473876953, 64.360107421875, 48.4322509765625, 89.23440551757812, 8.944595336914062, 34.734100341796875, 45.70033264160156, 14.641448974609375, 11.703155517578125, 34.877479553222656, 52.893280029296875, 43.19905471801758, 13.426483154296875, 55.193275451660156, 36.45958709716797, 44.75624084472656, 44.9754638671875, 9.252891540527344, 28.162073135375977, -7.789737701416016, 57.667884826660156, 54.83439636230469, 51.426170349121094, 1.9407901763916016, 32.58181381225586, 21.727619171142578, 36.849143981933594, 21.904144287109375, 20.394363403320312, 19.47875213623047, 43.98014831542969, 14.77978515625, 34.35588073730469, 39.564369201660156, 7.670688629150391, 22.882171630859375, 70.58526611328125, 22.125885009765625, -9.081493377685547, 22.142852783203125, 13.788238525390625, 42.43586730957031, 13.570343017578125, 34.20745849609375, 74.76348876953125, 54.122161865234375, 20.067115783691406, 27.413394927978516, 4.3837127685546875, 67.71541595458984, 20.818950653076172, 40.579437255859375, 10.530906677246094, 37.35343551635742, 24.321136474609375, -12.495393753051758, 71.87098693847656, 19.434814453125, 37.24394989013672], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000646.npy"} +{"epoch": 0.9486049926578561, "step": 647, "batch_size": 64, "mean": 32.761592864990234, "std": 27.287033081054688, "min": -16.99725341796875, "p10": -4.631001281738279, "median": 28.941256523132324, "p90": 70.33505477905274, "max": 95.20999145507812, "pos_frac": 0.875, "sample": [49.06217956542969, 52.08861541748047, 40.0816650390625, 45.703338623046875, 46.42595672607422, 44.186161041259766, 61.4400634765625, 54.377593994140625, 35.94007110595703, 15.982986450195312, 63.404380798339844, 25.532241821289062, 7.0754241943359375, 28.374366760253906, 95.20999145507812, 45.381385803222656, 15.015396118164062, 89.14452362060547, 43.91541290283203, 33.63813781738281, 15.7730712890625, 3.758880615234375, 28.98274803161621, 8.891036987304688, 52.84465026855469, 78.8379135131836, -2.9851531982421875, 37.412574768066406, -16.54755401611328, 71.2988510131836, 38.051048278808594, 5.740093231201172, 20.88709259033203, 41.534446716308594, 68.1891098022461, 24.779632568359375, 26.694442749023438, 39.55451965332031, -5.33636474609375, -16.99725341796875, 20.410789489746094, 0.9617843627929688, 87.26898956298828, 3.575469970703125, -14.076923370361328, 25.284841537475586, 51.63945770263672, 28.899765014648438, 22.89719009399414, 66.6187744140625, 13.941638946533203, 26.08111000061035, 71.25474548339844, 61.627586364746094, 84.17713928222656, 15.239944458007812, -7.992607116699219, 24.279464721679688, -12.003795623779297, 28.227462768554688, 30.608901977539062, 34.250518798828125, -5.357387542724609, 25.583433151245117], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000647.npy"} +{"epoch": 0.9500734214390602, "step": 648, "batch_size": 64, "mean": 40.340911865234375, "std": 34.430606842041016, "min": -10.056533813476562, "p10": 1.6166843414306662, "median": 36.866031646728516, "p90": 83.13314590454102, "max": 146.689453125, "pos_frac": 0.921875, "sample": [28.312332153320312, 0.6949691772460938, 19.258987426757812, 81.03878021240234, 12.219524383544922, 61.309120178222656, 114.3490219116211, 146.689453125, 47.91321563720703, 43.05680847167969, 14.908985137939453, 26.87449073791504, 25.967681884765625, 55.706626892089844, 12.851776123046875, 6.4312591552734375, 48.33213806152344, 53.47084045410156, 56.90058898925781, -2.1821060180664062, -4.195047378540039, 41.58910369873047, 23.87213706970215, 28.465835571289062, 37.99397277832031, 52.910400390625, 63.24946594238281, 38.151329040527344, 34.57927703857422, 30.460552215576172, 66.68695068359375, -10.056533813476562, 35.73809051513672, 108.95175170898438, 10.463333129882812, -3.909027099609375, 17.451335906982422, 40.98526382446289, 68.58953857421875, 0.7718505859375, 22.0535888671875, 71.08155059814453, 69.63703918457031, 13.766983032226562, 9.281700134277344, 93.62628173828125, 132.2804412841797, 10.093536376953125, 10.4326171875, 22.630464553833008, 66.05247497558594, 51.02910614013672, 6.2292633056640625, 6.162143707275391, 88.75128173828125, 3.587963104248047, 70.5928955078125, 63.01202392578125, 7.88017463684082, 41.63531494140625, 40.52414321899414, 68.58592224121094, -7.993316650390625, 84.03073120117188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000648.npy"} +{"epoch": 0.9515418502202643, "step": 649, "batch_size": 64, "mean": 28.49066925048828, "std": 28.495798110961914, "min": -16.786928176879883, "p10": -0.9296592712402343, "median": 19.965651512145996, "p90": 69.19300918579101, "max": 118.7969970703125, "pos_frac": 0.875, "sample": [10.917362213134766, 44.05290985107422, 46.377708435058594, -0.9684123992919922, 68.15569305419922, 23.375686645507812, 10.733016967773438, 25.962631225585938, 17.149505615234375, 18.08948516845703, 77.10577392578125, -4.829071044921875, 14.299549102783203, 10.485214233398438, 20.059425354003906, -4.500335693359375, 8.48370361328125, 66.1181640625, -6.6219329833984375, 50.353492736816406, 14.944435119628906, 1.9214649200439453, -16.786928176879883, 1.7001094818115234, 19.871877670288086, -0.8392353057861328, 65.88630676269531, 17.324493408203125, 4.493343353271484, 44.331298828125, 59.84994888305664, 4.8662261962890625, -1.2538528442382812, 13.897369384765625, 78.29812622070312, 36.18733215332031, 55.062469482421875, 46.05284881591797, 35.5467529296875, 51.087615966796875, 72.36886596679688, 24.83319664001465, 3.2973194122314453, 94.81436157226562, 27.99329376220703, 3.9409751892089844, 54.71417236328125, 118.7969970703125, 28.08935546875, 7.002372741699219, 2.4626941680908203, 20.921707153320312, 21.470993041992188, 69.6375732421875, 15.483322143554688, 83.54179382324219, 55.057586669921875, -4.971149444580078, 5.497959136962891, 13.658601760864258, 8.058914184570312, 18.5203857421875, 21.989646911621094, 28.980270385742188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000649.npy"} +{"epoch": 0.9530102790014684, "step": 650, "batch_size": 64, "mean": 36.623985290527344, "std": 29.48505973815918, "min": -12.738967895507812, "p10": 2.9937297821044924, "median": 32.330135345458984, "p90": 77.70841522216797, "max": 113.77725219726562, "pos_frac": 0.953125, "sample": [-10.362449645996094, 2.8108139038085938, 42.334938049316406, 82.5655517578125, 2.7720718383789062, 71.33522033691406, 44.33701705932617, 18.459392547607422, 70.05760955810547, 26.75206184387207, 43.80570983886719, -4.8383331298828125, 101.5274429321289, 32.11158752441406, 74.76170349121094, 35.594940185546875, 42.41120147705078, 72.61518859863281, 33.281917572021484, 81.59378051757812, 78.6275634765625, 33.631103515625, 71.1939697265625, 56.4420166015625, 19.451669692993164, 21.87466049194336, 2.70513916015625, 9.971153259277344, 17.381561279296875, 17.28490447998047, 80.46371459960938, 41.27776336669922, -12.738967895507812, 22.260787963867188, 14.892107009887695, 113.77725219726562, 10.965997695922852, 113.67018127441406, 2.9330406188964844, 32.96625518798828, 10.005876541137695, 38.33753204345703, 3.1353378295898438, 50.37992858886719, 32.548683166503906, 58.768829345703125, 41.173561096191406, 22.22917938232422, 24.821807861328125, 75.56373596191406, 52.27423095703125, 23.88433837890625, 52.99449157714844, 58.31261444091797, 13.10427474975586, 14.326005935668945, 11.680694580078125, 5.917926788330078, 7.30828857421875, 26.339813232421875, 44.37939453125, 9.586570739746094, 23.936588287353516, 29.965980529785156], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000650.npy"} +{"epoch": 0.9544787077826725, "step": 651, "batch_size": 64, "mean": 35.643516540527344, "std": 26.986141204833984, "min": -6.048469543457031, "p10": 4.484818077087402, "median": 28.83005142211914, "p90": 80.8577133178711, "max": 94.82373046875, "pos_frac": 0.953125, "sample": [94.82373046875, 61.201332092285156, 22.871139526367188, 85.4984130859375, 23.196273803710938, 8.410514831542969, 29.92010498046875, 68.74010467529297, 32.42186737060547, 20.603302001953125, 26.53234100341797, 57.124908447265625, 10.511323928833008, 26.05823516845703, 42.730224609375, 38.19728088378906, 11.80642318725586, 15.483100891113281, 27.383073806762695, 55.98246765136719, 23.06855010986328, -6.048469543457031, 65.1690902709961, 49.835777282714844, 81.16461181640625, 15.539802551269531, 7.480524063110352, -2.3121414184570312, 30.21392822265625, 17.360305786132812, 33.487510681152344, 4.4465484619140625, 22.453285217285156, -2.368040084838867, 74.70315551757812, 23.301799774169922, 61.84797668457031, 40.53917694091797, 72.1787109375, 4.574113845825195, 38.55926513671875, 47.72148513793945, 2.1356868743896484, 11.625722885131836, 0.30608367919921875, 80.14161682128906, 21.940105438232422, 84.61639404296875, 2.259387969970703, 37.07838439941406, 8.447412490844727, 17.47343635559082, 87.02448272705078, 42.28952407836914, 55.705467224121094, 16.700504302978516, 44.17849349975586, 15.145530700683594, 82.31680297851562, 32.592987060546875, 27.73999786376953, 42.83937454223633, 90.03146362304688, 16.18314552307129], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000651.npy"} +{"epoch": 0.9559471365638766, "step": 652, "batch_size": 64, "mean": 35.44752502441406, "std": 28.201440811157227, "min": -4.27130126953125, "p10": 3.2466537475585966, "median": 27.82210922241211, "p90": 75.14475708007812, "max": 107.83541870117188, "pos_frac": 0.9375, "sample": [11.198535919189453, 17.693695068359375, 60.507843017578125, 10.732398986816406, -0.7998466491699219, 46.638004302978516, 39.158843994140625, 18.255889892578125, 70.30517578125, 39.207557678222656, 82.05863189697266, 61.66028594970703, 87.32545471191406, 11.959671020507812, 0.521453857421875, 17.00870132446289, 7.78155517578125, 75.30938720703125, 94.23724365234375, 1.7922916412353516, 13.535964965820312, -2.298971176147461, 28.149246215820312, 24.810806274414062, 46.35425567626953, -4.27130126953125, 27.494972229003906, 18.849273681640625, 16.674909591674805, 16.17479705810547, 26.118488311767578, 107.83541870117188, 19.81201934814453, 86.12834930419922, 42.20085906982422, 46.186492919921875, 32.98316955566406, 18.729339599609375, -2.5036678314208984, 9.730377197265625, 10.246780395507812, 25.8106689453125, 8.933338165283203, 13.836368560791016, 67.68098449707031, 56.90118408203125, 62.47344970703125, 10.003105163574219, 60.09941101074219, 21.224899291992188, 74.34646606445312, 11.42098617553711, 89.96795654296875, 40.92372131347656, 32.30295181274414, 32.66666793823242, 74.7606201171875, 2.0553741455078125, 49.54877853393555, 34.3770751953125, 52.45616149902344, 39.38974380493164, 65.94100952148438, 6.02630615234375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000652.npy"} +{"epoch": 0.9574155653450808, "step": 653, "batch_size": 64, "mean": 35.66375732421875, "std": 25.90984344482422, "min": -11.282958984375, "p10": 6.123506927490235, "median": 35.06632423400879, "p90": 75.8134521484375, "max": 112.09405517578125, "pos_frac": 0.9375, "sample": [73.79341125488281, 7.85321044921875, 5.118940353393555, 88.32197570800781, 52.53143310546875, 15.12445068359375, 16.97021484375, -11.282958984375, -6.360908508300781, 34.251953125, 88.68368530273438, 6.843223571777344, -10.267250061035156, 66.31001281738281, 45.812660217285156, 26.891448974609375, 56.655120849609375, 41.89256286621094, 57.903594970703125, 58.23634338378906, -2.318845748901367, 35.1845588684082, 39.9277229309082, 29.1947021484375, 46.06732940673828, 24.092056274414062, 46.48444366455078, 44.008567810058594, 21.48150634765625, 40.64929962158203, 84.57237243652344, 29.805801391601562, 77.69493865966797, 20.434097290039062, 34.948089599609375, 36.89263916015625, 5.870567321777344, 43.691688537597656, 40.936363220214844, 45.77460479736328, 112.09405517578125, 26.277130126953125, 1.3166389465332031, 10.499837875366211, 84.16433715820312, 42.71320343017578, 10.691146850585938, 32.368011474609375, 35.44813537597656, 16.58573341369629, 27.161209106445312, 20.19108009338379, 76.67918395996094, 38.45478820800781, 27.233579635620117, 37.69002914428711, 6.7136993408203125, 11.789710998535156, 47.57135009765625, 41.482810974121094, 55.34442901611328, 15.058586120605469, 21.483795166015625, 22.792465209960938], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000653.npy"} +{"epoch": 0.9588839941262849, "step": 654, "batch_size": 64, "mean": 29.483509063720703, "std": 28.891340255737305, "min": -21.131942749023438, "p10": -2.0206186294555653, "median": 24.253183364868164, "p90": 64.92828521728516, "max": 145.5150146484375, "pos_frac": 0.859375, "sample": [6.778800964355469, 81.03443908691406, 15.702713012695312, 25.61902618408203, 23.289749145507812, 15.739112854003906, -14.310951232910156, 17.867889404296875, 10.086727142333984, 63.82734680175781, 41.0898323059082, 33.226539611816406, 25.65684700012207, 25.586776733398438, 27.495349884033203, -11.090950012207031, 22.604408264160156, 30.352508544921875, 33.482669830322266, 49.02809524536133, 32.201873779296875, 145.5150146484375, 19.720138549804688, 17.696258544921875, 19.09040069580078, 53.63215637207031, -0.6386260986328125, 21.058704376220703, 21.660118103027344, -0.978668212890625, 20.62261199951172, 45.67593002319336, 56.49286651611328, 49.08349609375, 18.688262939453125, 20.018043518066406, 47.340911865234375, -2.709320068359375, 1.9641437530517578, 65.40011596679688, 59.55149841308594, 25.216617584228516, 43.230987548828125, 8.852499008178711, -11.653125762939453, 69.26644134521484, 27.743133544921875, 65.69364929199219, 81.93960571289062, 12.92569351196289, 34.735260009765625, 50.895355224609375, 35.508575439453125, 19.096702575683594, 3.6739578247070312, -2.4671688079833984, 44.722267150878906, -6.694028854370117, 106.62451171875, 26.594345092773438, 20.434356689453125, 13.0863037109375, -21.131942749023438, 4.49761962890625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000654.npy"} +{"epoch": 0.960352422907489, "step": 655, "batch_size": 64, "mean": 30.53487205505371, "std": 29.86149024963379, "min": -13.743030548095703, "p10": -0.8515663146972655, "median": 25.702972412109375, "p90": 74.79225845336914, "max": 125.78433227539062, "pos_frac": 0.875, "sample": [25.785194396972656, 21.29440689086914, 81.71101379394531, 27.568376541137695, -2.7770862579345703, 87.37763977050781, -0.9015655517578125, 68.20458984375, 3.7334518432617188, 14.107681274414062, 38.00923156738281, 3.710874557495117, 10.627670288085938, -13.743030548095703, 26.25397300720215, 10.189308166503906, 23.04004669189453, 125.78433227539062, 10.839729309082031, 47.9996337890625, 31.584991455078125, 50.52185821533203, 92.79867553710938, 50.10456085205078, 1.8926620483398438, 15.490470886230469, 48.263938903808594, 43.112701416015625, -13.435806274414062, 13.564453125, -2.545602798461914, 75.52206420898438, 41.242431640625, -0.7349014282226562, 47.687835693359375, 36.0679931640625, 5.595478057861328, 35.98677062988281, 48.07440185546875, 33.45543670654297, 3.109455108642578, 20.084747314453125, 31.76105499267578, 33.433815002441406, 80.18943786621094, 8.16229248046875, 25.620750427246094, 0.9679718017578125, 43.94892120361328, 33.83795166015625, -3.3311614990234375, 7.3656463623046875, 27.873291015625, 22.231693267822266, 73.0893783569336, -5.726711273193359, 109.90081787109375, 3.772216796875, 10.376937866210938, 65.69779968261719, 46.797325134277344, 21.900543212890625, 12.012981414794922, 18.088760375976562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000655.npy"} +{"epoch": 0.9618208516886931, "step": 656, "batch_size": 64, "mean": 41.93996047973633, "std": 26.170562744140625, "min": -9.511810302734375, "p10": 6.463903427124024, "median": 39.751220703125, "p90": 70.3494758605957, "max": 120.03076171875, "pos_frac": 0.953125, "sample": [81.0235824584961, 39.28685760498047, 28.42902374267578, -1.2963066101074219, 16.93561553955078, 66.76351165771484, 7.260837554931641, 55.750946044921875, 37.47069549560547, 66.63402557373047, 6.1223602294921875, 42.05564880371094, 54.85917663574219, 54.672149658203125, 18.297983169555664, 51.34449005126953, 11.168228149414062, 25.156784057617188, 19.727188110351562, 34.08224868774414, 36.47289276123047, 64.52606201171875, 59.043922424316406, 3.102874755859375, -9.511810302734375, 28.31359100341797, 49.66932678222656, 63.20996856689453, 43.16044616699219, 96.90225219726562, 31.854869842529297, 66.12255859375, 62.558860778808594, -1.0611133575439453, 68.70006561279297, 52.83412170410156, 61.987815856933594, 51.1304931640625, 62.193695068359375, 0.46484375, 87.77552032470703, 51.16645812988281, 72.48473358154297, 71.05636596679688, 8.724355697631836, 81.70339965820312, 39.19808578491211, 120.03076171875, 33.39909362792969, 56.159461975097656, 36.82001495361328, 36.00170135498047, 20.782899856567383, 5.8512115478515625, 39.42543029785156, 43.41869354248047, 17.1392822265625, 23.68358612060547, 64.02680206298828, 11.367225646972656, 40.07701110839844, 38.305686950683594, 22.296600341796875, 55.842315673828125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000656.npy"} +{"epoch": 0.9632892804698973, "step": 657, "batch_size": 64, "mean": 37.42141342163086, "std": 28.833955764770508, "min": -21.648757934570312, "p10": 5.915207290649415, "median": 34.29959487915039, "p90": 77.25778350830079, "max": 121.79554748535156, "pos_frac": 0.96875, "sample": [24.450439453125, 2.140716552734375, 18.760421752929688, 52.613922119140625, 78.18572998046875, 121.05000305175781, 38.99640655517578, 25.108612060546875, 56.10050964355469, 7.451530456542969, -18.421417236328125, 29.11456298828125, 24.189056396484375, 17.573776245117188, 34.676239013671875, 48.78587341308594, 2.5128631591796875, 49.88313293457031, 33.922950744628906, 75.09257507324219, 13.058914184570312, 54.89030838012695, 40.106666564941406, 90.96466064453125, 29.666358947753906, 78.67512512207031, 20.15482521057129, 48.154869079589844, 15.072181701660156, 5.4716033935546875, 6.950283050537109, 33.343875885009766, 5.414882659912109, 10.772331237792969, 32.39950942993164, 55.79225158691406, 48.50941467285156, 17.41461944580078, 24.245574951171875, 10.970312118530273, 43.73652648925781, 53.418731689453125, 3.7670116424560547, 46.952552795410156, 90.26817321777344, 18.75218391418457, 48.45904541015625, 51.375396728515625, 35.88007354736328, 29.72454833984375, 98.24415588378906, 52.382781982421875, 37.014556884765625, 69.51841735839844, 29.175338745117188, 11.048858642578125, 54.286231994628906, 121.79554748535156, 44.071075439453125, -21.648757934570312, 37.11573028564453, 11.34184455871582, 24.16252899169922, 39.91143798828125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000657.npy"} +{"epoch": 0.9647577092511013, "step": 658, "batch_size": 64, "mean": 35.11966323852539, "std": 30.29734992980957, "min": -23.62458038330078, "p10": 1.6117561340332032, "median": 30.515384674072266, "p90": 72.28136901855471, "max": 136.17062377929688, "pos_frac": 0.921875, "sample": [29.41375732421875, 6.457366943359375, 11.306793212890625, -12.360847473144531, 1.0215644836425781, 38.93533706665039, 61.34008026123047, 26.94791030883789, 30.706344604492188, 58.77239990234375, 13.844554901123047, 136.17062377929688, 30.324424743652344, 74.58370971679688, 26.413116455078125, 55.248077392578125, 17.98172378540039, 32.57819366455078, 9.004379272460938, -6.214962005615234, 15.3870849609375, 13.325447082519531, 43.72621154785156, 34.459197998046875, 18.16510772705078, 31.258224487304688, 66.90924072265625, 44.340545654296875, 81.01150512695312, -23.62458038330078, 1.6296844482421875, 17.632535934448242, 21.95378875732422, 74.78443908691406, 13.77569580078125, 100.36863708496094, 6.432914733886719, 83.94374084472656, 17.73760223388672, 10.673826217651367, 51.03964614868164, 45.659400939941406, 63.5601806640625, 56.06007385253906, 51.35646057128906, 18.314224243164062, 47.828460693359375, 1.6040725708007812, 59.164886474609375, 57.45649719238281, 35.24303436279297, 40.2652473449707, -8.63694953918457, 57.469505310058594, 25.242870330810547, 46.60869598388672, 64.83526611328125, 109.90859985351562, 1.8846206665039062, 45.802093505859375, 23.016006469726562, 23.72930145263672, -9.007911682128906, 22.88880157470703], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000658.npy"} +{"epoch": 0.9662261380323054, "step": 659, "batch_size": 64, "mean": 34.305973052978516, "std": 29.324827194213867, "min": -21.34691619873047, "p10": -1.4792613983154295, "median": 35.50633239746094, "p90": 66.81596527099609, "max": 112.21209716796875, "pos_frac": 0.84375, "sample": [20.77475929260254, -1.1366348266601562, 8.17416763305664, 42.63790512084961, 10.804573059082031, 37.89310073852539, 112.21209716796875, 61.311485290527344, 0.9988079071044922, 11.840476989746094, 100.03646850585938, -8.741357803344727, 44.3973388671875, 1.3974609375, 65.4127197265625, 20.25421142578125, 37.78466796875, 28.154064178466797, 39.918212890625, 17.761886596679688, 22.778013229370117, -21.34691619873047, -0.043704986572265625, 66.46029663085938, 40.018531799316406, 79.5093994140625, 62.21558380126953, 50.768211364746094, 63.100372314453125, 78.327392578125, 59.158966064453125, 27.477163314819336, -1.3005123138427734, 12.942901611328125, 51.12577819824219, 50.748748779296875, 66.48716735839844, 16.539783477783203, 11.15020751953125, 47.13395690917969, 33.41608810424805, 11.057167053222656, 58.221473693847656, -1.555868148803711, 69.26100158691406, -7.556480407714844, 4.636817932128906, 31.56134033203125, 52.72504806518555, 87.04863739013672, 64.95289611816406, 25.47867774963379, -1.5913772583007812, 42.025962829589844, 34.508785247802734, 19.36505126953125, 66.15686798095703, 36.50387954711914, 66.95687866210938, -10.986465454101562, 40.34466552734375, 38.23970031738281, -7.962211608886719, 7.636016845703125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000659.npy"} +{"epoch": 0.9676945668135095, "step": 660, "batch_size": 64, "mean": 37.83965301513672, "std": 27.794391632080078, "min": -5.1110687255859375, "p10": 3.416747856140137, "median": 35.600074768066406, "p90": 66.91301651000977, "max": 127.02607727050781, "pos_frac": 0.921875, "sample": [44.50879669189453, 44.09132385253906, 21.63726806640625, 45.500450134277344, -1.623321533203125, 66.40193939208984, 89.81242370605469, -2.0670394897460938, 19.970314025878906, 58.40804672241211, 18.991613388061523, 37.72049331665039, 43.914772033691406, 53.48753356933594, 29.752761840820312, 68.63004302978516, 34.12480163574219, -5.1110687255859375, 53.64577102661133, 13.350799560546875, 29.581329345703125, 22.047760009765625, 55.394752502441406, 42.635032653808594, 63.86659240722656, 99.6961898803711, -2.966634750366211, 12.927316665649414, 69.968994140625, 61.518272399902344, 19.94024658203125, 34.59437561035156, 19.930809020996094, 46.639190673828125, 32.123504638671875, 61.32752990722656, 54.85259246826172, 54.41893005371094, 49.06664276123047, 20.763412475585938, 3.3418617248535156, 21.892597198486328, 38.245086669921875, 41.803977966308594, 32.141456604003906, 12.849334716796875, 3.591482162475586, 122.27113342285156, 127.02607727050781, 18.638038635253906, 29.539535522460938, 52.53990936279297, 22.831378936767578, 67.13204956054688, 57.64073181152344, -1.7434844970703125, 22.464401245117188, 36.60577392578125, 20.261932373046875, 51.24694061279297, 11.663711547851562, 1.4299201965332031, 4.245294570922852, 40.60395431518555], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000660.npy"} +{"epoch": 0.9691629955947136, "step": 661, "batch_size": 64, "mean": 34.966941833496094, "std": 30.865558624267578, "min": -28.07427978515625, "p10": 3.4970308303833018, "median": 27.793622970581055, "p90": 73.81653900146485, "max": 148.329345703125, "pos_frac": 0.921875, "sample": [29.504146575927734, 0.24277305603027344, 66.75566101074219, 24.04169273376465, 36.73855972290039, 27.586688995361328, 36.4174919128418, -9.479705810546875, 61.32965087890625, 73.19606018066406, 37.063316345214844, 22.353195190429688, 14.965965270996094, 59.9984130859375, 148.329345703125, 44.673118591308594, 31.260086059570312, 19.683128356933594, -0.8605194091796875, 22.749048233032227, 58.214420318603516, 4.374835968017578, 16.52547836303711, 5.866939544677734, 3.120828628540039, 68.76284790039062, 15.081886291503906, 75.92509460449219, 77.76904296875, 9.486007690429688, -13.008193969726562, 28.572731018066406, 18.42108154296875, 79.54273986816406, 47.242652893066406, 7.70536994934082, 13.344703674316406, 15.532371520996094, 22.634130477905273, 22.96038818359375, 6.051794052124023, 27.304901123046875, 22.991058349609375, 80.96385955810547, 45.18391418457031, -12.206161499023438, 66.99563598632812, 26.672279357910156, 17.771709442138672, 55.55959701538086, 74.08245849609375, -28.07427978515625, 17.707656860351562, 7.468400955200195, 58.38568878173828, 59.839805603027344, 26.022024154663086, 66.13803100585938, 37.03266143798828, 114.63467407226562, 31.425640106201172, 43.86045837402344, 39.448394775390625, 28.00055694580078], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000661.npy"} +{"epoch": 0.9706314243759178, "step": 662, "batch_size": 64, "mean": 30.80561065673828, "std": 25.711318969726562, "min": -15.023521423339844, "p10": 6.280214881896973, "median": 23.169673919677734, "p90": 65.84209289550782, "max": 109.64275360107422, "pos_frac": 0.953125, "sample": [11.733966827392578, 41.98394012451172, 57.05699157714844, -11.345226287841797, 63.846778869628906, 32.952659606933594, 69.33949279785156, -15.023521423339844, 45.80635452270508, 23.141845703125, 31.94928741455078, 19.62152862548828, 8.790756225585938, 25.548553466796875, 26.389617919921875, 19.998794555664062, 5.751752853393555, 14.501888275146484, 88.54264068603516, 33.55714416503906, 66.69722747802734, 22.809585571289062, 32.04267120361328, 32.82033157348633, 12.1256103515625, 22.98406982421875, 30.717403411865234, 51.46385192871094, 13.742630004882812, 18.573211669921875, 13.904350280761719, 8.818456649780273, 11.813430786132812, 7.295492172241211, 55.355865478515625, 40.84754943847656, 14.792831420898438, 26.54998016357422, 44.81366729736328, 23.19750213623047, 17.587356567382812, 43.11572265625, 78.69090270996094, 80.7860107421875, 41.43450927734375, 22.210609436035156, 16.402671813964844, 19.888072967529297, 30.725006103515625, 35.20684814453125, 19.249135971069336, 0.17183303833007812, 18.785903930664062, 14.490955352783203, 6.668663024902344, 6.113737106323242, 107.50247192382812, 109.64275360107422, 41.510528564453125, 54.62067413330078, 49.763633728027344, 0.24335479736328125, 14.62811279296875, -3.3913116455078125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000662.npy"} +{"epoch": 0.9720998531571219, "step": 663, "batch_size": 64, "mean": 33.04689407348633, "std": 24.61198616027832, "min": -21.303390502929688, "p10": 6.674959945678711, "median": 30.33756732940674, "p90": 61.595861053466805, "max": 123.40316772460938, "pos_frac": 0.984375, "sample": [50.751808166503906, 37.72486877441406, 56.12176513671875, 15.02204704284668, 46.327735900878906, 32.561187744140625, 6.684276580810547, 31.46942138671875, 9.541961669921875, 7.010688781738281, 30.43839454650879, 6.670967102050781, 50.518409729003906, 50.765380859375, 25.055587768554688, 35.999725341796875, 30.236740112304688, 48.65229034423828, 10.433982849121094, 17.6851806640625, 36.27063751220703, 15.893508911132812, 13.591808319091797, 20.723121643066406, 63.91242218017578, 54.48828125, 54.53416442871094, 26.850296020507812, 64.28158569335938, 41.23345947265625, 14.044536590576172, 79.56878662109375, 89.17645263671875, -21.303390502929688, 4.787872314453125, 44.62434387207031, 41.88343048095703, 21.31058120727539, 17.5130558013916, 62.493446350097656, 59.501495361328125, 70.55802917480469, 56.03948211669922, 15.156196594238281, 24.098419189453125, 36.97746276855469, 15.987239837646484, 28.287704467773438, 17.701955795288086, 123.40316772460938, 56.91699981689453, 55.937255859375, 46.29176330566406, 4.419921875, 22.024444580078125, 10.660324096679688, 4.709201812744141, 5.30177116394043, 41.00181579589844, 7.616462707519531, 5.386695861816406, 36.65898895263672, 9.94842529296875, 14.865310668945312], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000663.npy"} +{"epoch": 0.973568281938326, "step": 664, "batch_size": 64, "mean": 37.14731979370117, "std": 27.36865997314453, "min": -51.943485260009766, "p10": 4.58526954650879, "median": 37.534637451171875, "p90": 69.7172103881836, "max": 95.9025650024414, "pos_frac": 0.9375, "sample": [71.2774658203125, 33.67870330810547, 11.702556610107422, 79.50355529785156, 54.7274169921875, 61.48046875, 41.25830078125, 81.57588958740234, 41.837127685546875, 60.506256103515625, -0.6601715087890625, 59.13848876953125, 54.697784423828125, 16.090913772583008, 91.85424041748047, 33.02830505371094, 34.602447509765625, 64.04249572753906, 19.750244140625, 66.07661437988281, 56.44389343261719, 46.688018798828125, 13.638107299804688, 60.778045654296875, 4.234149932861328, 12.210334777832031, 51.61668014526367, 13.354377746582031, 6.1572418212890625, 25.56634521484375, 5.404548645019531, 54.01951217651367, 80.87254333496094, 26.722007751464844, 23.392425537109375, 53.96672821044922, -51.943485260009766, 12.231010437011719, 48.663368225097656, 53.972412109375, -9.992225646972656, 27.317169189453125, 30.33941650390625, 48.18505859375, 39.17327880859375, 55.332000732421875, 29.932830810546875, 28.4940128326416, 91.09439086914062, 18.14574432373047, 3.5317916870117188, 31.686859130859375, 18.054885864257812, 12.775291442871094, 50.212249755859375, 52.46087646484375, 45.51249694824219, -5.958271026611328, 38.304534912109375, 23.769298553466797, 38.2525634765625, 36.81671142578125, 95.9025650024414, 3.9277172088623047], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000664.npy"} +{"epoch": 0.9750367107195301, "step": 665, "batch_size": 64, "mean": 31.53219223022461, "std": 23.78449058532715, "min": -7.907407760620117, "p10": 3.0599838256835956, "median": 31.692195892333984, "p90": 58.89931678771973, "max": 120.5572509765625, "pos_frac": 0.9375, "sample": [2.2639694213867188, 58.674198150634766, 17.424564361572266, 21.457252502441406, 37.27435302734375, 37.71638488769531, 32.37676239013672, 14.8111572265625, 34.40840148925781, 50.36427307128906, 13.236419677734375, 23.388587951660156, 7.691802978515625, 70.0084228515625, 14.482887268066406, 19.45741844177246, 1.5891876220703125, 56.01002502441406, 5.872947692871094, 32.89390563964844, 38.99017333984375, 40.586585998535156, 40.857566833496094, 55.01665496826172, -7.907407760620117, 33.97332000732422, 15.531036376953125, 36.07660675048828, 64.49698638916016, 8.892597198486328, 4.917350769042969, 22.356477737426758, 47.84483337402344, 58.54732894897461, 44.685821533203125, 14.083234786987305, 36.036277770996094, 43.44319152832031, 29.93743133544922, 11.964393615722656, 1.8732185363769531, 44.49517059326172, 85.96533966064453, -0.7871246337890625, 38.10545349121094, 60.05635070800781, -0.8560810089111328, 22.41606903076172, 58.99579620361328, 39.616493225097656, 6.573295593261719, 16.657764434814453, 47.51548385620117, 9.133087158203125, 62.77714538574219, 26.410789489746094, 10.464035034179688, 15.995269775390625, 51.98797607421875, 54.600257873535156, 120.5572509765625, -7.1396636962890625, 31.00762939453125, 29.905925750732422], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000665.npy"} +{"epoch": 0.9765051395007343, "step": 666, "batch_size": 64, "mean": 35.28595733642578, "std": 26.626768112182617, "min": -26.87394905090332, "p10": 2.087860488891603, "median": 35.888084411621094, "p90": 75.0630470275879, "max": 90.63648986816406, "pos_frac": 0.90625, "sample": [44.839324951171875, 22.905975341796875, 46.05503845214844, 21.683162689208984, -5.096784591674805, 63.54938507080078, 46.364173889160156, 42.03068542480469, 24.461563110351562, 80.39794921875, -0.9208145141601562, 9.692718505859375, 1.48321533203125, 8.254257202148438, 27.144195556640625, 37.531394958496094, 85.04193115234375, 23.635231018066406, 14.540218353271484, 52.28591537475586, 74.37554931640625, 35.48951721191406, 19.548349380493164, 36.286651611328125, 90.63648986816406, -4.76654052734375, 40.070411682128906, 3.498699188232422, 19.538047790527344, -23.28448486328125, 47.607666015625, 56.46485900878906, 25.51759910583496, 75.3576889038086, 49.95048522949219, 38.0478515625, 57.12732696533203, 11.97137451171875, 29.991294860839844, 82.01020812988281, 13.707927703857422, 87.15619659423828, -26.87394905090332, 77.5186538696289, 13.101211547851562, 46.70968246459961, -4.145145416259766, 61.917877197265625, 24.503170013427734, 23.944137573242188, 53.59516906738281, 40.81711196899414, 19.71270751953125, 22.089466094970703, 18.168052673339844, 27.794189453125, 20.632938385009766, 52.69661331176758, 58.66841125488281, 39.72528839111328, 12.77154541015625, 58.828887939453125, 60.816184997558594, 43.12702178955078], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000666.npy"} +{"epoch": 0.9779735682819384, "step": 667, "batch_size": 64, "mean": 34.412696838378906, "std": 24.72762107849121, "min": -16.377269744873047, "p10": 5.710640716552735, "median": 33.06393814086914, "p90": 61.63188552856446, "max": 106.17068481445312, "pos_frac": 0.96875, "sample": [17.188339233398438, 14.521232604980469, 19.55269432067871, -16.377269744873047, 13.372665405273438, 47.291221618652344, 24.532135009765625, 43.20178985595703, 40.656158447265625, 9.9954833984375, 4.810380935668945, 3.4813995361328125, 20.945274353027344, 43.51170349121094, 26.501928329467773, 59.08440399169922, 51.40925598144531, 8.833106994628906, 84.54893493652344, 37.91765594482422, 22.29840087890625, 51.2312126159668, 51.56695556640625, 55.44764709472656, 50.01423645019531, 13.228828430175781, 29.56171989440918, 25.089759826660156, 52.514007568359375, 50.023780822753906, 22.98796844482422, 40.83863830566406, 106.17068481445312, 74.68832397460938, 47.606903076171875, -7.440765380859375, 4.1528167724609375, 14.761993408203125, 39.83141326904297, 45.56793212890625, 9.059036254882812, 58.25286865234375, 25.03594970703125, 5.5398101806640625, 79.13618469238281, 62.723663330078125, 73.61663818359375, 55.99565887451172, 45.38987731933594, 27.47265625, 5.4571990966796875, 47.25572204589844, 14.358749389648438, 90.22900390625, 34.11333084106445, 51.826454162597656, 32.48908233642578, 11.378395080566406, 33.6387939453125, 10.8719482421875, 7.246795654296875, 6.109245300292969, 51.78273010253906, 18.311866760253906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000667.npy"} +{"epoch": 0.9794419970631424, "step": 668, "batch_size": 64, "mean": 33.14695358276367, "std": 26.5964298248291, "min": -19.394302368164062, "p10": 1.422550678253174, "median": 30.69057846069336, "p90": 70.86981430053713, "max": 98.8163070678711, "pos_frac": 0.921875, "sample": [58.12181091308594, 48.01048278808594, 83.78840637207031, -12.39471435546875, 6.124629974365234, 9.69082260131836, 64.430419921875, 15.164756774902344, 52.742828369140625, 30.05893325805664, 29.406585693359375, -6.8665924072265625, 23.46784210205078, 51.09329605102539, 79.41107177734375, 32.49830627441406, -10.509414672851562, 0.9705047607421875, 10.932548522949219, 21.260114669799805, 32.474464416503906, 41.12358856201172, 27.073341369628906, 26.902114868164062, 55.661781311035156, 30.617225646972656, 15.088455200195312, 3.585296630859375, 3.554779052734375, 39.325904846191406, 44.41907501220703, 59.78953552246094, 30.763931274414062, 30.846885681152344, 42.81797790527344, 45.63384246826172, -5.585456848144531, 86.05292510986328, 17.588546752929688, 1.9691524505615234, 49.07427978515625, 63.10607147216797, 73.62955474853516, 24.554407119750977, -19.394302368164062, 74.351318359375, 8.604400634765625, 15.967498779296875, 25.524051666259766, 7.026409149169922, 37.36659240722656, 30.473392486572266, 31.77813720703125, 50.702903747558594, 1.67767333984375, 25.201438903808594, 37.854286193847656, 58.98387908935547, 98.8163070678711, 54.12190246582031, 1.3132123947143555, 58.936981201171875, 17.17822265625, 77.45028686523438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000668.npy"} +{"epoch": 0.9809104258443465, "step": 669, "batch_size": 64, "mean": 38.299198150634766, "std": 32.85043716430664, "min": -26.481550216674805, "p10": 0.7169198989868165, "median": 37.22895050048828, "p90": 73.90769729614259, "max": 132.2518310546875, "pos_frac": 0.90625, "sample": [42.2030029296875, 7.402458190917969, -14.723047256469727, 70.99903106689453, -26.481550216674805, 71.4336166381836, 22.8521728515625, 66.16065979003906, 15.065038681030273, 0.6639537811279297, 132.2518310546875, 9.34450912475586, 8.270843505859375, 39.73277282714844, -6.84735107421875, 98.23212432861328, 35.74810028076172, 14.707002639770508, 19.98846435546875, 50.85274124145508, 60.92657470703125, 33.32585906982422, 74.968017578125, 25.036590576171875, 64.34370422363281, 57.401512145996094, 45.11188507080078, 93.75920104980469, 56.426368713378906, 4.785541534423828, 63.33922576904297, 109.78250122070312, 49.35224914550781, 68.81027221679688, 87.23267364501953, -5.038972854614258, 5.104183197021484, 4.825065612792969, 95.21896362304688, 51.10259246826172, 20.044471740722656, 7.81768798828125, 28.973731994628906, 41.358821868896484, -20.0206298828125, 61.75653839111328, 11.425888061523438, 70.83598327636719, 38.709800720214844, 19.529346466064453, 28.102561950683594, 60.974735260009766, 12.624282836914062, 20.845348358154297, 28.029327392578125, 39.78507995605469, -7.8606109619140625, 64.63154602050781, 54.06210708618164, 61.64822006225586, 30.422595977783203, 29.11359405517578, 0.8405075073242188, 43.82734298706055], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000669.npy"} +{"epoch": 0.9823788546255506, "step": 670, "batch_size": 64, "mean": 39.020320892333984, "std": 26.109134674072266, "min": -7.844768524169922, "p10": 7.713055419921876, "median": 39.65853500366211, "p90": 74.46239013671875, "max": 96.96675109863281, "pos_frac": 0.953125, "sample": [31.647947311401367, 54.2125244140625, 60.318748474121094, 68.0026626586914, 27.082401275634766, 13.291109085083008, -3.1198348999023438, 10.50820541381836, 74.55767822265625, 51.36383056640625, 49.38691711425781, 12.206253051757812, -0.8127422332763672, 46.54058837890625, 63.36256408691406, 45.87446594238281, 24.295166015625, 9.842247009277344, 54.7659912109375, 14.029491424560547, 9.722415924072266, 10.311866760253906, 81.58500671386719, 22.35076904296875, 73.31355285644531, 43.095787048339844, 23.08767318725586, 20.431949615478516, 66.64122772216797, 15.46063232421875, 77.40589141845703, 59.08690643310547, 13.742427825927734, 44.31990051269531, 47.581295013427734, 85.42122650146484, 64.65162658691406, 7.043216705322266, 68.62112426757812, 91.15653991699219, -7.844768524169922, 76.53861999511719, 22.851806640625, 21.97027587890625, 74.24005126953125, 61.89789962768555, 20.309677124023438, 52.76093673706055, 1.3038787841796875, 34.00033187866211, 8.849113464355469, 96.96675109863281, 50.60406494140625, 44.06353759765625, 18.017480850219727, 7.226173400878906, 33.28820037841797, 36.384979248046875, 47.16654968261719, 51.840293884277344, 6.084751129150391, 42.932090759277344, 30.68090057373047, 32.77954864501953], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000670.npy"} +{"epoch": 0.9838472834067548, "step": 671, "batch_size": 64, "mean": 35.541099548339844, "std": 29.027013778686523, "min": -11.566856384277344, "p10": 5.84485378265381, "median": 28.887191772460938, "p90": 78.92247314453128, "max": 118.17950439453125, "pos_frac": 0.953125, "sample": [2.5947418212890625, 29.1798095703125, 15.152950286865234, 53.14019775390625, 7.203340530395508, 97.56037902832031, 42.544532775878906, 7.496894836425781, 12.929237365722656, 56.094451904296875, 35.0928955078125, 16.643516540527344, 56.46295928955078, 34.35710906982422, 9.14491081237793, 18.628890991210938, 25.45736312866211, 21.740028381347656, 58.45988464355469, 66.47625732421875, 6.893087387084961, 54.44269561767578, 2.189727783203125, 71.96492004394531, 91.60783386230469, 41.56550598144531, 81.90428161621094, 46.489295959472656, 60.22559356689453, 28.594573974609375, 5.395610809326172, 9.722419738769531, 8.507865905761719, 12.959129333496094, 57.27284240722656, 12.259147644042969, 70.05288696289062, 32.98192596435547, 51.99878692626953, 19.462112426757812, 60.186187744140625, -11.566856384277344, 26.102584838867188, 10.604904174804688, 57.212432861328125, 36.832672119140625, 26.867935180664062, 96.15293884277344, 3.6228256225585938, 21.928688049316406, 54.38056182861328, 29.46385955810547, -2.096323013305664, 22.14666748046875, 22.689910888671875, 11.67352294921875, -7.849952697753906, 7.125724792480469, 14.324043273925781, 89.6888656616211, 29.83782196044922, 118.17950439453125, 81.96585083007812, 42.3052978515625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000671.npy"} +{"epoch": 0.9853157121879589, "step": 672, "batch_size": 64, "mean": 32.39436721801758, "std": 30.078792572021484, "min": -13.848587036132812, "p10": -1.1314830780029297, "median": 27.38913917541504, "p90": 72.87146682739261, "max": 121.78811645507812, "pos_frac": 0.875, "sample": [23.90238380432129, 17.37903594970703, 10.213188171386719, 22.27912139892578, 3.3547935485839844, -1.0420417785644531, 41.29234313964844, 46.9290771484375, 94.24393463134766, 2.3310012817382812, 6.105068206787109, 21.34772491455078, -13.848587036132812, 33.30778503417969, 43.724449157714844, -3.6283493041992188, 26.873138427734375, -10.849937438964844, 34.125999450683594, 104.27831268310547, 27.905139923095703, 66.24553680419922, 1.7717666625976562, -1.1698150634765625, 48.00993347167969, 13.552162170410156, 75.71115112304688, 24.78310203552246, 32.349308013916016, 53.570091247558594, -2.8882217407226562, 95.62295532226562, 121.78811645507812, 37.883392333984375, 64.94479370117188, 24.875526428222656, 6.557857513427734, 30.760498046875, 37.944129943847656, 58.31439971923828, 30.157508850097656, 30.740066528320312, 10.774419784545898, 58.6895751953125, 23.60256576538086, 37.81224060058594, 59.120811462402344, 45.370025634765625, 14.412635803222656, 30.321231842041016, 4.650764465332031, 19.858291625976562, 4.6340789794921875, 23.26087188720703, 4.94780158996582, -9.048372268676758, 13.109443664550781, 44.645172119140625, 66.17115020751953, -3.899810791015625, 80.98126983642578, 63.03752136230469, 88.34211730957031, 10.698020935058594], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000672.npy"} +{"epoch": 0.986784140969163, "step": 673, "batch_size": 64, "mean": 37.07730484008789, "std": 27.53151512145996, "min": -8.660566329956055, "p10": 2.995305252075197, "median": 34.42546081542969, "p90": 71.62473678588869, "max": 109.17060852050781, "pos_frac": 0.96875, "sample": [4.654762268066406, 2.202281951904297, 5.280109405517578, 41.130523681640625, 23.517494201660156, 109.17060852050781, 52.05754852294922, 20.269351959228516, 101.21280670166016, 52.94470977783203, 0.3701438903808594, 67.83567810058594, 24.43987274169922, 57.709197998046875, 4.391147613525391, 26.42333984375, -8.660566329956055, 69.35405731201172, 64.1878662109375, 26.480932235717773, 36.692893981933594, 51.76045227050781, 38.409454345703125, 2.3970870971679688, 64.3634033203125, 10.207286834716797, 17.773727416992188, 66.474609375, -1.1971397399902344, 16.217666625976562, 51.017452239990234, 64.32124328613281, 41.161277770996094, 43.310909271240234, 15.786905288696289, 93.35209655761719, 28.360275268554688, 31.112472534179688, 77.97929382324219, 34.57501220703125, 14.28335952758789, 12.485803604125977, 44.348304748535156, 67.45768737792969, 46.45231246948242, 2.0270767211914062, 19.185110092163086, 10.525968551635742, 34.275909423828125, 84.75740814208984, 39.08837890625, 23.641958236694336, 86.52991485595703, 62.51133728027344, 0.9727783203125, 37.3488883972168, 30.066253662109375, 45.4407958984375, 43.37552261352539, 10.538642883300781, 12.990684509277344, 21.077049255371094, 72.59788513183594, 21.92015266418457], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000673.npy"} +{"epoch": 0.9882525697503671, "step": 674, "batch_size": 64, "mean": 37.874568939208984, "std": 28.332286834716797, "min": -0.8873023986816406, "p10": 3.8411582946777356, "median": 32.6145133972168, "p90": 74.58417053222657, "max": 115.23285675048828, "pos_frac": 0.984375, "sample": [36.64567184448242, 30.081851959228516, 50.066650390625, 21.113235473632812, 4.9066619873046875, 61.971900939941406, 23.350568771362305, 35.10845184326172, 2.5879383087158203, 73.46387481689453, 43.55833435058594, 82.14224243164062, 0.6626167297363281, 86.11378479003906, 17.451095581054688, 13.39442253112793, 10.370168685913086, 115.23285675048828, 17.370567321777344, 97.74995422363281, 25.341306686401367, 24.030879974365234, 1.992950439453125, 63.61957550048828, 46.086097717285156, 27.952587127685547, 58.54124450683594, 10.217727661132812, 49.17497253417969, 2.68975830078125, 70.29908752441406, 109.48989868164062, 5.309688568115234, 3.3845138549804688, 85.71268463134766, 74.91896057128906, 15.464715957641602, 17.02648162841797, 49.65889358520508, 71.6293716430664, 33.07441711425781, 23.05455780029297, 73.80299377441406, 17.570499420166016, 9.944047927856445, 26.278335571289062, 55.14764404296875, 45.429603576660156, 16.240646362304688, 43.522701263427734, 28.092979431152344, -0.8873023986816406, 68.51835632324219, 9.839115142822266, 49.2564697265625, 41.87071228027344, 50.82588195800781, 45.091331481933594, 26.715904235839844, 37.69990539550781, 42.8994140625, 0.1819629669189453, 32.15460968017578, 11.76336669921875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000674.npy"} +{"epoch": 0.9897209985315712, "step": 675, "batch_size": 64, "mean": 39.495147705078125, "std": 27.726579666137695, "min": -28.165916442871094, "p10": 7.229411315917969, "median": 39.685787200927734, "p90": 78.8889892578125, "max": 101.28887939453125, "pos_frac": 0.921875, "sample": [18.383596420288086, 27.800670623779297, 36.75922393798828, 31.04807472229004, 36.311614990234375, 8.546283721923828, 82.66844177246094, -0.6773185729980469, 16.911518096923828, 50.73755645751953, 56.388832092285156, -20.955970764160156, 72.39464569091797, 19.17430877685547, 34.35882568359375, 4.300392150878906, 7.133491516113281, 79.27655029296875, 62.753196716308594, -28.165916442871094, 79.73149108886719, -3.3801803588867188, 101.28887939453125, 70.34870147705078, 36.97843933105469, 9.318456649780273, 12.361812591552734, 48.74936294555664, 41.83800506591797, 7.453224182128906, 48.04094696044922, 73.24330139160156, 61.003570556640625, 31.458251953125, 23.94532012939453, 70.19493103027344, 34.80600357055664, -11.302230834960938, 59.66808319091797, 77.98468017578125, 44.586700439453125, 38.46394348144531, 15.173080444335938, 53.651336669921875, 45.83015441894531, 14.156997680664062, 37.576263427734375, 79.85234832763672, 42.64425277709961, 40.907630920410156, 14.077394485473633, 52.681732177734375, 55.022193908691406, 31.596874237060547, 20.148536682128906, 26.07919692993164, 50.01662826538086, 11.665946960449219, 50.509979248046875, 85.34535217285156, 60.98206329345703, 57.24739074707031, 81.96304321289062, 48.631431579589844], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000675.npy"} +{"epoch": 0.9911894273127754, "step": 676, "batch_size": 64, "mean": 36.297088623046875, "std": 28.963899612426758, "min": -24.548484802246094, "p10": 5.836563873291018, "median": 28.943811416625977, "p90": 72.48710861206057, "max": 99.427490234375, "pos_frac": 0.90625, "sample": [68.1841049194336, 90.55322265625, 41.275672912597656, 13.568092346191406, 64.42205810546875, 16.203109741210938, 46.839080810546875, 61.503395080566406, -1.8371696472167969, 10.881542205810547, -24.548484802246094, 36.03584289550781, 27.496421813964844, 14.74090576171875, 91.58201599121094, 64.0430908203125, 44.744815826416016, 48.10799789428711, 10.85788345336914, 17.85525131225586, 9.59918212890625, -1.1776809692382812, 42.68290710449219, 15.198883056640625, 20.447242736816406, 11.984306335449219, 23.82819366455078, 4.5814056396484375, 78.06678771972656, 61.12449645996094, 28.610671997070312, 59.85002136230469, 54.586204528808594, 47.914154052734375, 12.555267333984375, 16.248239517211914, 64.44070434570312, 56.799468994140625, 14.490447998046875, 99.427490234375, 34.6246337890625, 19.525588989257812, 51.6727294921875, 25.814022064208984, 40.42967224121094, -19.636932373046875, -6.543548583984375, 26.697479248046875, 8.765266418457031, 17.749725341796875, 92.49057006835938, 56.54522705078125, 66.67352294921875, 21.65473175048828, 13.531982421875, 53.40559387207031, 98.9020004272461, 16.985389709472656, 62.874332427978516, -1.3937149047851562, 29.27695083618164, 74.33125305175781, 15.65435791015625, 59.215423583984375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000676.npy"} +{"epoch": 0.9926578560939795, "step": 677, "batch_size": 64, "mean": 37.449485778808594, "std": 28.72178840637207, "min": -29.576866149902344, "p10": 1.5555427551269536, "median": 35.74952507019043, "p90": 73.69906234741212, "max": 108.10565185546875, "pos_frac": 0.921875, "sample": [-29.576866149902344, 108.10565185546875, 19.752647399902344, 17.8697509765625, 7.899894714355469, 58.590721130371094, -13.640913009643555, 57.20941925048828, 78.3707046508789, 9.550209045410156, 23.211477279663086, 52.76338195800781, 3.4155197143554688, 78.58045959472656, 1.356353759765625, 61.544166564941406, 67.33624267578125, 14.845596313476562, 13.387451171875, 29.596160888671875, 37.1556282043457, 47.71091842651367, 34.343421936035156, 29.846641540527344, 46.530052185058594, 21.75103759765625, 75.27977752685547, 31.876691818237305, 57.35185241699219, 64.500732421875, 16.57769203186035, 28.6207275390625, -10.638158798217773, 27.62042236328125, 69.0929946899414, 26.852264404296875, 80.36044311523438, 27.19563865661621, 38.54054260253906, 58.88105773925781, 82.64759063720703, 3.3980941772460938, 70.00003051757812, 47.54400634765625, 1.3342437744140625, 51.30829620361328, 24.314926147460938, 15.727666854858398, 88.2459716796875, 13.081283569335938, 54.23307800292969, -0.9860115051269531, -14.636672973632812, 37.66719055175781, 70.01072692871094, 54.1812858581543, 24.45587921142578, 65.19031524658203, 55.11150360107422, 67.5611572265625, 37.54730987548828, 2.0203170776367188, 28.97699737548828, 48.213600158691406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000677.npy"} +{"epoch": 0.9941262848751835, "step": 678, "batch_size": 64, "mean": 36.70520782470703, "std": 31.234189987182617, "min": -9.92763900756836, "p10": -0.10983066558837695, "median": 31.58275604248047, "p90": 82.16436004638672, "max": 100.96841430664062, "pos_frac": 0.890625, "sample": [19.869173049926758, 100.96841430664062, 36.274688720703125, 58.66162109375, 56.10548400878906, 1.8298492431640625, 49.183631896972656, -1.0653076171875, 4.100074768066406, 21.05703353881836, 5.9805908203125, 49.56717300415039, 31.203842163085938, 80.62728881835938, 3.08282470703125, 45.02687072753906, 84.11221313476562, -6.017494201660156, 31.877471923828125, 57.18824005126953, 80.44661712646484, 21.439102172851562, -3.296293258666992, 26.1778564453125, 26.37810516357422, 95.59579467773438, 17.029739379882812, 10.59332275390625, -4.770336151123047, 82.82310485839844, 50.73457336425781, -0.9411220550537109, -4.954925537109375, 62.56263732910156, 9.960685729980469, 90.4603271484375, 31.288040161132812, 73.11354064941406, 5.0195159912109375, 57.781524658203125, 2.193756103515625, 41.19623565673828, 44.83659362792969, 66.1368637084961, 54.38887023925781, -9.92763900756836, 66.13827514648438, 77.44805908203125, 13.505775451660156, 71.36802673339844, 37.290618896484375, 3.380340576171875, 24.330364227294922, 3.5945777893066406, 4.549154281616211, 10.254045486450195, 7.059602737426758, 85.99565887451172, 41.678489685058594, 50.15412139892578, 68.03995513916016, 26.787601470947266, 92.75668334960938, 8.901893615722656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000678.npy"} +{"epoch": 0.9955947136563876, "step": 679, "batch_size": 64, "mean": 36.85191345214844, "std": 26.95528793334961, "min": -26.8460693359375, "p10": 9.05175895690918, "median": 33.74213218688965, "p90": 70.539266204834, "max": 109.41598510742188, "pos_frac": 0.953125, "sample": [77.56924438476562, 19.79096794128418, 27.154911041259766, 67.88973236083984, 49.878028869628906, 66.95743560791016, 30.562889099121094, 78.13243103027344, 62.03001403808594, 35.231689453125, 34.676361083984375, 69.43595123291016, 51.798683166503906, 12.153875350952148, 102.63409423828125, 19.076515197753906, 38.236358642578125, 24.599517822265625, 47.352745056152344, 31.006149291992188, 67.24642944335938, 104.83587646484375, 20.07647705078125, 14.074577331542969, 17.302989959716797, -26.8460693359375, 21.37706184387207, 33.54814910888672, 39.45634078979492, 16.9168643951416, 31.969444274902344, 15.10699462890625, -8.632316589355469, 64.21703338623047, 29.15985107421875, 19.03173065185547, 60.32373046875, 47.467559814453125, 6.908702850341797, 21.10700225830078, -6.05755615234375, 2.510528564453125, 31.175277709960938, 37.387550354003906, 73.10626983642578, 34.35181427001953, 13.779115676879883, 15.3800048828125, 13.63486099243164, 16.147247314453125, 56.78174591064453, 9.986923217773438, 8.65097427368164, 33.93611526489258, 36.376853942871094, 35.89271926879883, 71.01211547851562, 52.26691818237305, 48.870506286621094, 109.41598510742188, 43.056610107421875, 47.26969909667969, 26.445758819580078, 6.328245162963867], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000679.npy"} +{"epoch": 0.9970631424375918, "step": 680, "batch_size": 64, "mean": 36.1793098449707, "std": 29.486133575439453, "min": -17.139549255371094, "p10": 3.3941310882568376, "median": 33.03257179260254, "p90": 65.80172119140624, "max": 125.35784912109375, "pos_frac": 0.921875, "sample": [53.2635498046875, -12.624980926513672, 2.7226295471191406, 44.09904479980469, 11.733108520507812, 31.419227600097656, 107.04313659667969, 49.45440673828125, 51.703834533691406, 18.78691864013672, 12.9140625, 35.49046325683594, -2.820392608642578, 49.36517333984375, 39.31114959716797, 105.65505981445312, 23.315387725830078, 2.107635498046875, -6.64617919921875, 7.436408996582031, 73.5899658203125, 38.65821075439453, 8.072528839111328, 19.613643646240234, 23.224769592285156, 65.80770874023438, 32.92787551879883, 22.42653465270996, 29.819190979003906, 42.364013671875, 10.228080749511719, 62.06920623779297, 39.111907958984375, 51.0636100769043, 45.138729095458984, 39.12739562988281, 12.153617858886719, 47.744476318359375, 46.95960998535156, 10.42523193359375, 61.28736877441406, 107.57485961914062, 50.67768859863281, 28.580650329589844, 31.041561126708984, -17.139549255371094, 65.78775024414062, 16.4805908203125, 16.970417022705078, 92.73706817626953, -2.7035980224609375, 125.35784912109375, 6.5805816650390625, 17.30945587158203, 63.68061828613281, 33.13726806640625, 56.659217834472656, 52.289215087890625, 20.34878158569336, 23.07733154296875, 4.960968017578125, 28.86908721923828, 46.34967041015625, 41.305084228515625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000680.npy"} +{"epoch": 0.9985315712187959, "step": 681, "batch_size": 64, "mean": 31.793434143066406, "std": 28.625511169433594, "min": -20.932456970214844, "p10": 1.5619449615478525, "median": 26.756820678710938, "p90": 63.89557571411134, "max": 120.13215637207031, "pos_frac": 0.90625, "sample": [10.313575744628906, -6.749244689941406, 65.24208068847656, 20.456085205078125, 60.56085205078125, 26.990196228027344, 55.158233642578125, 20.531536102294922, 23.631507873535156, 22.798564910888672, 7.05133056640625, 33.61895751953125, -12.778783798217773, 12.070281982421875, -0.13623046875, 120.13215637207031, 36.47681427001953, 10.929222106933594, 26.840309143066406, 48.58708190917969, 60.75373077392578, 2.9954757690429688, 12.139198303222656, 34.281951904296875, 15.641677856445312, 58.13347625732422, 18.840599060058594, 91.67431640625, 26.67333221435547, 32.817665100097656, 1.1368484497070312, 32.51275634765625, 34.546539306640625, 7.905494689941406, 21.645278930664062, 21.72612762451172, -0.6629962921142578, 6.2172393798828125, 33.49455261230469, -7.454242706298828, 18.37664794921875, 28.99622344970703, 37.992835998535156, 46.923301696777344, -20.932456970214844, 27.238853454589844, 5.33476448059082, 10.783348083496094, 16.063552856445312, 16.743179321289062, 72.52767944335938, 58.78449630737305, 76.39740753173828, 44.677268981933594, 21.862258911132812, 34.84241485595703, 60.34978485107422, 110.8502426147461, 54.25399398803711, 56.116783142089844, 2.5538368225097656, 60.192901611328125, 91.8756332397461, 15.231307983398438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000681.npy"} diff --git a/margin_logs/step_0000001.npy b/margin_logs/step_0000001.npy new file mode 100644 index 0000000..248c095 --- /dev/null +++ b/margin_logs/step_0000001.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb7ed5e9b5d6de6c4e509dd17cf5d9c91337fabd0c174e116c5e60872823ad93 +size 384 diff --git a/margin_logs/step_0000002.npy b/margin_logs/step_0000002.npy new file mode 100644 index 0000000..984e4c2 --- /dev/null +++ b/margin_logs/step_0000002.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc23171824afa57340cda53f69d83aef67c7c0b95175e9ec4a3a7bc3c221bc4f +size 384 diff --git a/margin_logs/step_0000003.npy b/margin_logs/step_0000003.npy new file mode 100644 index 0000000..056a5fa --- /dev/null +++ b/margin_logs/step_0000003.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:418e6e5cd92b10bf1e28d8bceb3f89136be6fe8f275c997a0c8557d2873b867c +size 384 diff --git a/margin_logs/step_0000004.npy b/margin_logs/step_0000004.npy new file mode 100644 index 0000000..dbb9388 --- /dev/null +++ b/margin_logs/step_0000004.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70ed6bb4a7844a126b51431c33e406886e7685352183bc1236e54476f8e81e45 +size 384 diff --git a/margin_logs/step_0000005.npy b/margin_logs/step_0000005.npy new file mode 100644 index 0000000..379d03f --- /dev/null +++ b/margin_logs/step_0000005.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6fab991d9e2b36765123ce28b66e0aff1939971a1fbc77b4b535d9f2606265 +size 384 diff --git a/margin_logs/step_0000006.npy b/margin_logs/step_0000006.npy new file mode 100644 index 0000000..afc2c5d --- /dev/null +++ b/margin_logs/step_0000006.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88a8c8cbaf88e5e9e925abbe8f458eeb677cfdb91c4a8040e8280f8b1a73e1e4 +size 384 diff --git a/margin_logs/step_0000007.npy b/margin_logs/step_0000007.npy new file mode 100644 index 0000000..bf90a80 --- /dev/null +++ b/margin_logs/step_0000007.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f5fe54aa252627bc2c3db43245db163434ac3d9f6b24867de2389e23bca1d49 +size 384 diff --git a/margin_logs/step_0000008.npy b/margin_logs/step_0000008.npy new file mode 100644 index 0000000..45dd27a --- /dev/null +++ b/margin_logs/step_0000008.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4714a227cd4f102d7808340946760cf0ff64391a2e75d255153033ef5be28915 +size 384 diff --git a/margin_logs/step_0000009.npy b/margin_logs/step_0000009.npy new file mode 100644 index 0000000..c9eab4f --- /dev/null +++ b/margin_logs/step_0000009.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e82a9cf3b893b43432f661492c4459d0429ed1b0f7cf623c0c0a9f2d846fd9fd +size 384 diff --git a/margin_logs/step_0000010.npy b/margin_logs/step_0000010.npy new file mode 100644 index 0000000..e4283af --- /dev/null +++ b/margin_logs/step_0000010.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d456afeca84a94f62603b74ea02c037956d8b7068a14e468816d36c05404883c +size 384 diff --git a/margin_logs/step_0000011.npy b/margin_logs/step_0000011.npy new file mode 100644 index 0000000..db1ea59 --- /dev/null +++ b/margin_logs/step_0000011.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:199f9ec94a2c5c377635a08dd749e8b0f949a11cb5d15c14d64739bcf0934d96 +size 384 diff --git a/margin_logs/step_0000012.npy b/margin_logs/step_0000012.npy new file mode 100644 index 0000000..cb2eb14 --- /dev/null +++ b/margin_logs/step_0000012.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1c24691574d467856047c0a8ad7decdc301d26606f323af1f78651e79e607af +size 384 diff --git a/margin_logs/step_0000013.npy b/margin_logs/step_0000013.npy new file mode 100644 index 0000000..e2c40c5 --- /dev/null +++ b/margin_logs/step_0000013.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72937956dbbdaa55ecaa5605c0860f9c750fe42f97e2f516a9dedc9bdae2dda8 +size 384 diff --git a/margin_logs/step_0000014.npy b/margin_logs/step_0000014.npy new file mode 100644 index 0000000..832924b --- /dev/null +++ b/margin_logs/step_0000014.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7080447ce95649fa92d438625cc59c676dd59dc94700bac95fe8af064a3f536 +size 384 diff --git a/margin_logs/step_0000015.npy b/margin_logs/step_0000015.npy new file mode 100644 index 0000000..e8b8b1d --- /dev/null +++ b/margin_logs/step_0000015.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7681d66b0e323d7f07dade92eb28d716b54048776c7df3ba3ff8a18cecc54bfd +size 384 diff --git a/margin_logs/step_0000016.npy b/margin_logs/step_0000016.npy new file mode 100644 index 0000000..f0c7389 --- /dev/null +++ b/margin_logs/step_0000016.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90a9d771e11e59c5b75562ab4fc867d38fd6826461e5ada6c3719d7698b6dc86 +size 384 diff --git a/margin_logs/step_0000017.npy b/margin_logs/step_0000017.npy new file mode 100644 index 0000000..cb0809f --- /dev/null +++ b/margin_logs/step_0000017.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03c92d62e4f79ecedf79884da9f1309796299cdb6e3ced8544b9591ac30cef31 +size 384 diff --git a/margin_logs/step_0000018.npy b/margin_logs/step_0000018.npy new file mode 100644 index 0000000..5536ffd --- /dev/null +++ b/margin_logs/step_0000018.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66bc270923a1e6911822af7fc37484bade25a737bdadc44865bf8af06e4ad258 +size 384 diff --git a/margin_logs/step_0000019.npy b/margin_logs/step_0000019.npy new file mode 100644 index 0000000..16e8820 --- /dev/null +++ b/margin_logs/step_0000019.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c05409f40d5b29a0ccf579e53cc841764bca622cf0fdaab54f78d874ab73e4c3 +size 384 diff --git a/margin_logs/step_0000020.npy b/margin_logs/step_0000020.npy new file mode 100644 index 0000000..61160a6 --- /dev/null +++ b/margin_logs/step_0000020.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8256bb663b8d4b542929d528b7572e44cdbc493bc37a8a6d8902d9715795c913 +size 384 diff --git a/margin_logs/step_0000021.npy b/margin_logs/step_0000021.npy new file mode 100644 index 0000000..bcbfb39 --- /dev/null +++ b/margin_logs/step_0000021.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a97c9e18a33d8f474d5805db7e17acff7e9924d9406890d20030286ffb915df8 +size 384 diff --git a/margin_logs/step_0000022.npy b/margin_logs/step_0000022.npy new file mode 100644 index 0000000..4fd8f5b --- /dev/null +++ b/margin_logs/step_0000022.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae020dce6c771df75a779bd24d43386b24f201cc3e6a7d8806728875a72a1a26 +size 384 diff --git a/margin_logs/step_0000023.npy b/margin_logs/step_0000023.npy new file mode 100644 index 0000000..36ff825 --- /dev/null +++ b/margin_logs/step_0000023.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5c19394d93e146ef88ba16c758c94561384d5ab3d02d9fb55e318a89b297978 +size 384 diff --git a/margin_logs/step_0000024.npy b/margin_logs/step_0000024.npy new file mode 100644 index 0000000..054a102 --- /dev/null +++ b/margin_logs/step_0000024.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28fb173472c0ff93637dc8c97042f0b4fee6e50d6d643146fc227574aa2f5781 +size 384 diff --git a/margin_logs/step_0000025.npy b/margin_logs/step_0000025.npy new file mode 100644 index 0000000..e5c5e29 --- /dev/null +++ b/margin_logs/step_0000025.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab79589b2b1a645e1675bd560f9ade1af90e446c0a8cda11fee74b6ec4c9210a +size 384 diff --git a/margin_logs/step_0000026.npy b/margin_logs/step_0000026.npy new file mode 100644 index 0000000..30a8c32 --- /dev/null +++ b/margin_logs/step_0000026.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e8d5d0387bda699d24814909a36c3bbad000462bc4dd6030af5fd32fc1333aa +size 384 diff --git a/margin_logs/step_0000027.npy b/margin_logs/step_0000027.npy new file mode 100644 index 0000000..39dbe17 --- /dev/null +++ b/margin_logs/step_0000027.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecf0ec01686a0b5618929ea88696e38a387f1bfaf8a41c9fae09d0c9ebe872d8 +size 384 diff --git a/margin_logs/step_0000028.npy b/margin_logs/step_0000028.npy new file mode 100644 index 0000000..c73cc6e --- /dev/null +++ b/margin_logs/step_0000028.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54c2ccce8127e043f3d88322e3ec9500fb8d682c5ed0224e621ac9cfedfe5ef7 +size 384 diff --git a/margin_logs/step_0000029.npy b/margin_logs/step_0000029.npy new file mode 100644 index 0000000..ae269fc --- /dev/null +++ b/margin_logs/step_0000029.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea7e6e7319248b9a465f5e0ca5d55062a190e144e1ee9c3c3905df8a6e9d92ef +size 384 diff --git a/margin_logs/step_0000030.npy b/margin_logs/step_0000030.npy new file mode 100644 index 0000000..d7104a3 --- /dev/null +++ b/margin_logs/step_0000030.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a5b00d051f1b2976f7f8dda38985592dbfa40e4305ae022fd02c6cad952e2bc +size 384 diff --git a/margin_logs/step_0000031.npy b/margin_logs/step_0000031.npy new file mode 100644 index 0000000..b4d8abe --- /dev/null +++ b/margin_logs/step_0000031.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9fc7fe78e407bf09d34f4ccc3a8de33461189e6879fd801642f9eb8b179002b +size 384 diff --git a/margin_logs/step_0000032.npy b/margin_logs/step_0000032.npy new file mode 100644 index 0000000..283f362 --- /dev/null +++ b/margin_logs/step_0000032.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65e0bc69de67eb5fd7a41aaad7a827a7b8d424207709cb0959eb2d51464bc903 +size 384 diff --git a/margin_logs/step_0000033.npy b/margin_logs/step_0000033.npy new file mode 100644 index 0000000..abfac15 --- /dev/null +++ b/margin_logs/step_0000033.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a97a65c12438c527404402dab62cb5080aff1456bc9054ba69be8a296904dc9 +size 384 diff --git a/margin_logs/step_0000034.npy b/margin_logs/step_0000034.npy new file mode 100644 index 0000000..e3176f9 --- /dev/null +++ b/margin_logs/step_0000034.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60f82ffb6cfc0675c80a69eec135bb2befebc52263d7b2b1f8347112530d3ba0 +size 384 diff --git a/margin_logs/step_0000035.npy b/margin_logs/step_0000035.npy new file mode 100644 index 0000000..e698ae6 --- /dev/null +++ b/margin_logs/step_0000035.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48de792eba8568601d7f036ca338cc7c08ad62ee98e970e012bbf7a909f7c2ed +size 384 diff --git a/margin_logs/step_0000036.npy b/margin_logs/step_0000036.npy new file mode 100644 index 0000000..9bf09a1 --- /dev/null +++ b/margin_logs/step_0000036.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a1db1ed5ae92cfecba6eaa2096b2ebecc24a8c8d8b849dcded461fbba74ac76 +size 384 diff --git a/margin_logs/step_0000037.npy b/margin_logs/step_0000037.npy new file mode 100644 index 0000000..874e90f --- /dev/null +++ b/margin_logs/step_0000037.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38176df5b216f9d124758b05402a7fd7f624adec39c03946d75b57b78f1210f7 +size 384 diff --git a/margin_logs/step_0000038.npy b/margin_logs/step_0000038.npy new file mode 100644 index 0000000..7df5e0a --- /dev/null +++ b/margin_logs/step_0000038.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:767e4094d76ea6a6d528eb91bd1174f52530237ac0c4ce1982e478b9823b7c05 +size 384 diff --git a/margin_logs/step_0000039.npy b/margin_logs/step_0000039.npy new file mode 100644 index 0000000..d803eeb --- /dev/null +++ b/margin_logs/step_0000039.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a39013df5a8b86852278385a1319bfb5c1ae548c76ff49e246ab1c0b6175649 +size 384 diff --git a/margin_logs/step_0000040.npy b/margin_logs/step_0000040.npy new file mode 100644 index 0000000..64c9be2 --- /dev/null +++ b/margin_logs/step_0000040.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd7eee3beab33d3d69bc05f40729d762cfb828b239f6199907d040fd6137b7c2 +size 384 diff --git a/margin_logs/step_0000041.npy b/margin_logs/step_0000041.npy new file mode 100644 index 0000000..41f52ce --- /dev/null +++ b/margin_logs/step_0000041.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1b65be02fd4e513be08642c272b4775900aaf89a58d57e02072da9d8fbf985d +size 384 diff --git a/margin_logs/step_0000042.npy b/margin_logs/step_0000042.npy new file mode 100644 index 0000000..8ec4653 --- /dev/null +++ b/margin_logs/step_0000042.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85d2a0401444c25a3e298d9dda508c5410f4c8a9d62f8ed656e4b75a0a7cf78a +size 384 diff --git a/margin_logs/step_0000043.npy b/margin_logs/step_0000043.npy new file mode 100644 index 0000000..f54cc00 --- /dev/null +++ b/margin_logs/step_0000043.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9592c384649a765e93b5a118ede27f7e44ee9fda957f994d2c2303eb3a58c8c6 +size 384 diff --git a/margin_logs/step_0000044.npy b/margin_logs/step_0000044.npy new file mode 100644 index 0000000..d7707dd --- /dev/null +++ b/margin_logs/step_0000044.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:653f0d290589610237940b856c40b86db0921173e5bf216f57dd126881bb546a +size 384 diff --git a/margin_logs/step_0000045.npy b/margin_logs/step_0000045.npy new file mode 100644 index 0000000..64029ff --- /dev/null +++ b/margin_logs/step_0000045.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8e5730714125f9d462a824e6f6f5b723bf674337c5c8eb7b40ac59441ae2427 +size 384 diff --git a/margin_logs/step_0000046.npy b/margin_logs/step_0000046.npy new file mode 100644 index 0000000..a42964a --- /dev/null +++ b/margin_logs/step_0000046.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:347775cf9fed9c0f2b54dc3c26b93c035b052eaf57a296b4b34990c16d0e4a51 +size 384 diff --git a/margin_logs/step_0000047.npy b/margin_logs/step_0000047.npy new file mode 100644 index 0000000..17e32bb --- /dev/null +++ b/margin_logs/step_0000047.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:412f933226c6154a8b2fa35df683ac962f2c4088d8a17c141708d162ec9be9c4 +size 384 diff --git a/margin_logs/step_0000048.npy b/margin_logs/step_0000048.npy new file mode 100644 index 0000000..8994351 --- /dev/null +++ b/margin_logs/step_0000048.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e266d4ebf95e33b80307a2bfae6d83c92e97392d5da27179e43c0c00ebf4846 +size 384 diff --git a/margin_logs/step_0000049.npy b/margin_logs/step_0000049.npy new file mode 100644 index 0000000..00c7d11 --- /dev/null +++ b/margin_logs/step_0000049.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6960ceb88911e3db2df8f87f81457f9fe5a3a717ffaab0ef88f7e1c2a8c10ec6 +size 384 diff --git a/margin_logs/step_0000050.npy b/margin_logs/step_0000050.npy new file mode 100644 index 0000000..836a472 --- /dev/null +++ b/margin_logs/step_0000050.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:faf71ee2dc6d886431aacb47396220a1a4badec0840a7a8a7ac9287c3ceb6ff3 +size 384 diff --git a/margin_logs/step_0000051.npy b/margin_logs/step_0000051.npy new file mode 100644 index 0000000..9b89e9e --- /dev/null +++ b/margin_logs/step_0000051.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:062bb0217bbdd717befd5d7076fe59d49cfb3a67b0051a50bae379710019b1f9 +size 384 diff --git a/margin_logs/step_0000052.npy b/margin_logs/step_0000052.npy new file mode 100644 index 0000000..6d8baa4 --- /dev/null +++ b/margin_logs/step_0000052.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8128ff434d8f57e4072030bff64cd06b28ce4134c9f7df10bcdaa0bd9cb5807 +size 384 diff --git a/margin_logs/step_0000053.npy b/margin_logs/step_0000053.npy new file mode 100644 index 0000000..69c0143 --- /dev/null +++ b/margin_logs/step_0000053.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27b32c575d120c7d4732762c65e30e010e14f90b867aa9653ec90acd7d883616 +size 384 diff --git a/margin_logs/step_0000054.npy b/margin_logs/step_0000054.npy new file mode 100644 index 0000000..5f9943e --- /dev/null +++ b/margin_logs/step_0000054.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80a24e1d0561d8e216a5c15b4e4e569b570d3c3fbc008d4f490c9668dc1df5e7 +size 384 diff --git a/margin_logs/step_0000055.npy b/margin_logs/step_0000055.npy new file mode 100644 index 0000000..b3672eb --- /dev/null +++ b/margin_logs/step_0000055.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:477394d277998bae383883575bae24c5382f4b37538364134b87810f092f7580 +size 384 diff --git a/margin_logs/step_0000056.npy b/margin_logs/step_0000056.npy new file mode 100644 index 0000000..6d27156 --- /dev/null +++ b/margin_logs/step_0000056.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6a660fc4d0eb0e14f8e9672573fba2ec3abbe2a6fb493e807d9f6cffe5c9cfd +size 384 diff --git a/margin_logs/step_0000057.npy b/margin_logs/step_0000057.npy new file mode 100644 index 0000000..667c0c2 --- /dev/null +++ b/margin_logs/step_0000057.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3aefeeccfa2e7da22df78d0ed827cf74c7bd0f49a201e8cf8d513888104d7bd +size 384 diff --git a/margin_logs/step_0000058.npy b/margin_logs/step_0000058.npy new file mode 100644 index 0000000..ee6bc58 --- /dev/null +++ b/margin_logs/step_0000058.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54b91ab7490f846596fda8cbc0cbf7ac93e6088c10fc7ea5d3cb7ea5ec1d77d2 +size 384 diff --git a/margin_logs/step_0000059.npy b/margin_logs/step_0000059.npy new file mode 100644 index 0000000..f572077 --- /dev/null +++ b/margin_logs/step_0000059.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee01f3455b42fc4f4d83d8ded92f14d2a3af1cd0891db1a797f46dc5752c7f10 +size 384 diff --git a/margin_logs/step_0000060.npy b/margin_logs/step_0000060.npy new file mode 100644 index 0000000..11844ac --- /dev/null +++ b/margin_logs/step_0000060.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63e087e663cdbbeb3afd71e93b46a6b32635c92f7c6dd56cb3ed987ab39d49d3 +size 384 diff --git a/margin_logs/step_0000061.npy b/margin_logs/step_0000061.npy new file mode 100644 index 0000000..a7e2aa8 --- /dev/null +++ b/margin_logs/step_0000061.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a023c3b5d24b2e50161b77b3bd021954222439f2eef66a6a3c5f2d72e06f77c +size 384 diff --git a/margin_logs/step_0000062.npy b/margin_logs/step_0000062.npy new file mode 100644 index 0000000..a17bb15 --- /dev/null +++ b/margin_logs/step_0000062.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f49a63f9dd5d46cbf35dbaf7120c579809ed507b022ffc83ad5f688b53e82d94 +size 384 diff --git a/margin_logs/step_0000063.npy b/margin_logs/step_0000063.npy new file mode 100644 index 0000000..229b87b --- /dev/null +++ b/margin_logs/step_0000063.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68258693e286a8cd5aa8f896b10e0c53307d801b04c2ba4556da096e47e809d3 +size 384 diff --git a/margin_logs/step_0000064.npy b/margin_logs/step_0000064.npy new file mode 100644 index 0000000..6c0f8f1 --- /dev/null +++ b/margin_logs/step_0000064.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abe509ac869ce100f89e846950ef1d493f3db87cdf419a37b4bd5cdbf4f96ced +size 384 diff --git a/margin_logs/step_0000065.npy b/margin_logs/step_0000065.npy new file mode 100644 index 0000000..4130503 --- /dev/null +++ b/margin_logs/step_0000065.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b2a0027513ac8059e1b2c88d2819a5ab08cdf96662a84556a7ec7f63cca0f03 +size 384 diff --git a/margin_logs/step_0000066.npy b/margin_logs/step_0000066.npy new file mode 100644 index 0000000..f7142a5 --- /dev/null +++ b/margin_logs/step_0000066.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e81aabad2580c10f07b0bf9df99b77b30022e1f6c8b29bed2f8f5e654afad16 +size 384 diff --git a/margin_logs/step_0000067.npy b/margin_logs/step_0000067.npy new file mode 100644 index 0000000..3062fda --- /dev/null +++ b/margin_logs/step_0000067.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b59b972703f3e9e2fbe1e6faf9722e4fca81168b823cbb1a5e08e130a125aaab +size 384 diff --git a/margin_logs/step_0000068.npy b/margin_logs/step_0000068.npy new file mode 100644 index 0000000..3ed4ddc --- /dev/null +++ b/margin_logs/step_0000068.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e63eb26cbb61628119a9df337b21a14c2bd3d13499a8e2caebd2f289dd56d98 +size 384 diff --git a/margin_logs/step_0000069.npy b/margin_logs/step_0000069.npy new file mode 100644 index 0000000..f8f0453 --- /dev/null +++ b/margin_logs/step_0000069.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e7cf5652df0fd82c0401d94e3ba9206b7c880cf5f805bb2f2e33023237c9a4f +size 384 diff --git a/margin_logs/step_0000070.npy b/margin_logs/step_0000070.npy new file mode 100644 index 0000000..e33ce5d --- /dev/null +++ b/margin_logs/step_0000070.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94a3c62dd104d1ff123f7a6f688909535c13542d1ed665f0e40afb1f70bf1e01 +size 384 diff --git a/margin_logs/step_0000071.npy b/margin_logs/step_0000071.npy new file mode 100644 index 0000000..b03d088 --- /dev/null +++ b/margin_logs/step_0000071.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6073233e746b4bcc0b179aaaef42ade97b3f928b822007df56def6c4c5ae1f3c +size 384 diff --git a/margin_logs/step_0000072.npy b/margin_logs/step_0000072.npy new file mode 100644 index 0000000..5d78b1d --- /dev/null +++ b/margin_logs/step_0000072.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3f5123e762f59b5a7b90ece98dc0075e95a386f48f293e1dab5a1e187f1adc5 +size 384 diff --git a/margin_logs/step_0000073.npy b/margin_logs/step_0000073.npy new file mode 100644 index 0000000..6ed7786 --- /dev/null +++ b/margin_logs/step_0000073.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71926f982c35f83356f8724c6d2ce65a14116cbcbc75f85af7f27f5cfe721f85 +size 384 diff --git a/margin_logs/step_0000074.npy b/margin_logs/step_0000074.npy new file mode 100644 index 0000000..3369b49 --- /dev/null +++ b/margin_logs/step_0000074.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d996e909c5409b3b3334bf536c2f541a532830beecea0d25d8e2d0cbe2633a3b +size 384 diff --git a/margin_logs/step_0000075.npy b/margin_logs/step_0000075.npy new file mode 100644 index 0000000..eb75f2b --- /dev/null +++ b/margin_logs/step_0000075.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db0bf449af1a0b02b3c5a929182df8bda73aa3e8c7afbb391cc80e69fe12750c +size 384 diff --git a/margin_logs/step_0000076.npy b/margin_logs/step_0000076.npy new file mode 100644 index 0000000..b20c493 --- /dev/null +++ b/margin_logs/step_0000076.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc52b2a724e54528c416897f3d52841e970370f718bfbfca39391b3e95c08e1d +size 384 diff --git a/margin_logs/step_0000077.npy b/margin_logs/step_0000077.npy new file mode 100644 index 0000000..17f25a3 --- /dev/null +++ b/margin_logs/step_0000077.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb2b8baa402c82be029fe8b41c3076132c9a5c6f0b2bb5090a23b9f0fda59929 +size 384 diff --git a/margin_logs/step_0000078.npy b/margin_logs/step_0000078.npy new file mode 100644 index 0000000..a5b1c3d --- /dev/null +++ b/margin_logs/step_0000078.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19fb2b5785b6426a331b8cdcda9d6810d7de0fa379294a4b7b3e9f6a0eca8e54 +size 384 diff --git a/margin_logs/step_0000079.npy b/margin_logs/step_0000079.npy new file mode 100644 index 0000000..6ae066c --- /dev/null +++ b/margin_logs/step_0000079.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39867b10525052de1803b765a47fd812e55391142c088216eafa749524c279b9 +size 384 diff --git a/margin_logs/step_0000080.npy b/margin_logs/step_0000080.npy new file mode 100644 index 0000000..766ffde --- /dev/null +++ b/margin_logs/step_0000080.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ea074479653ad2678a286c5c3907d3cef6424135ae662a0ed7b9a3a15f507ce +size 384 diff --git a/margin_logs/step_0000081.npy b/margin_logs/step_0000081.npy new file mode 100644 index 0000000..21b3f0b --- /dev/null +++ b/margin_logs/step_0000081.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9759ee731045fa28e5f3bcdbb099c46c345e5567ff82bb6ef2824f9dce2b7166 +size 384 diff --git a/margin_logs/step_0000082.npy b/margin_logs/step_0000082.npy new file mode 100644 index 0000000..9f20763 --- /dev/null +++ b/margin_logs/step_0000082.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51e668fd72c25c69e8ce776ca007abaf55c1816c9cc12ce3245150a066daadea +size 384 diff --git a/margin_logs/step_0000083.npy b/margin_logs/step_0000083.npy new file mode 100644 index 0000000..50ee9f6 --- /dev/null +++ b/margin_logs/step_0000083.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1afb8d07b2fda370d7a20f1f14df2d4eef811b88bf3cf16c5be0be7fb6b36a64 +size 384 diff --git a/margin_logs/step_0000084.npy b/margin_logs/step_0000084.npy new file mode 100644 index 0000000..3269eff --- /dev/null +++ b/margin_logs/step_0000084.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3da3b17206141aa6f047236b67a9a2f305add761de04b81d024e6c42d4731f6 +size 384 diff --git a/margin_logs/step_0000085.npy b/margin_logs/step_0000085.npy new file mode 100644 index 0000000..b864ef0 --- /dev/null +++ b/margin_logs/step_0000085.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7e64812afddb25c6ee00621bcd18f64ea0c68cc7189a0c6d98265a6f7763069 +size 384 diff --git a/margin_logs/step_0000086.npy b/margin_logs/step_0000086.npy new file mode 100644 index 0000000..e890829 --- /dev/null +++ b/margin_logs/step_0000086.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6dc091b3d28411be2fda41e052e3b10cdcb7a533ac6a401c577d50d4672a9082 +size 384 diff --git a/margin_logs/step_0000087.npy b/margin_logs/step_0000087.npy new file mode 100644 index 0000000..fee679f --- /dev/null +++ b/margin_logs/step_0000087.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22c6a549826f27829bd326c936ba6e4e37dba6aa00a5b29cb9e8285ac5f41ffb +size 384 diff --git a/margin_logs/step_0000088.npy b/margin_logs/step_0000088.npy new file mode 100644 index 0000000..2e7da56 --- /dev/null +++ b/margin_logs/step_0000088.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6bcc1b1e9c9d9f388bf4044f24690876d2612d97b172e327714e203e6e786ca +size 384 diff --git a/margin_logs/step_0000089.npy b/margin_logs/step_0000089.npy new file mode 100644 index 0000000..d09637e --- /dev/null +++ b/margin_logs/step_0000089.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4a05b76d1bc62ead96fc765d3c70c9f783bb8bd05dcc380a26cf3345de7e381 +size 384 diff --git a/margin_logs/step_0000090.npy b/margin_logs/step_0000090.npy new file mode 100644 index 0000000..c910004 --- /dev/null +++ b/margin_logs/step_0000090.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6d7a7b5132a66475c7b290b67d2bec2633d57397d28b8bfecf28e4d8dd1352a +size 384 diff --git a/margin_logs/step_0000091.npy b/margin_logs/step_0000091.npy new file mode 100644 index 0000000..0cb5caa --- /dev/null +++ b/margin_logs/step_0000091.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ea60c76cd31d9223457b388253983e672637af560ce4b7344d9fa55916339cb +size 384 diff --git a/margin_logs/step_0000092.npy b/margin_logs/step_0000092.npy new file mode 100644 index 0000000..5bc7ff8 --- /dev/null +++ b/margin_logs/step_0000092.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5b4a29e944c2cac68ef362e0aa1db9d097f636dc2907153fc236ecdfa7f72cc +size 384 diff --git a/margin_logs/step_0000093.npy b/margin_logs/step_0000093.npy new file mode 100644 index 0000000..a01a7f1 --- /dev/null +++ b/margin_logs/step_0000093.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea7f29d843825a73fa56c801cd13b81523d86a8e700ef5ed67982894979a3249 +size 384 diff --git a/margin_logs/step_0000094.npy b/margin_logs/step_0000094.npy new file mode 100644 index 0000000..d5bf5a1 --- /dev/null +++ b/margin_logs/step_0000094.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ab01cc3337bf48ccd98474a9ab4ee349d844bd8f6dee24c204e08b99cdc1d19 +size 384 diff --git a/margin_logs/step_0000095.npy b/margin_logs/step_0000095.npy new file mode 100644 index 0000000..806652c --- /dev/null +++ b/margin_logs/step_0000095.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a69f08db00408ddcab684188a99c2fba3dd6f44f5211867b8f6d40ec7e6e971 +size 384 diff --git a/margin_logs/step_0000096.npy b/margin_logs/step_0000096.npy new file mode 100644 index 0000000..41c1299 --- /dev/null +++ b/margin_logs/step_0000096.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57a17c36f27f066b5a44518cb6f20a8ea096b3381ac67818b64220d1dc22f4fa +size 384 diff --git a/margin_logs/step_0000097.npy b/margin_logs/step_0000097.npy new file mode 100644 index 0000000..5b04763 --- /dev/null +++ b/margin_logs/step_0000097.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c67298820c9b40caa91b0079eb1d49bbb835ad4cd549b9849e57a15b14daa2fb +size 384 diff --git a/margin_logs/step_0000098.npy b/margin_logs/step_0000098.npy new file mode 100644 index 0000000..0e7bf29 --- /dev/null +++ b/margin_logs/step_0000098.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6455aac7680082a0aa522919c9bb6ba2b287e5e274b487e59ed9add77b9661af +size 384 diff --git a/margin_logs/step_0000099.npy b/margin_logs/step_0000099.npy new file mode 100644 index 0000000..425a1df --- /dev/null +++ b/margin_logs/step_0000099.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b57e30fb6b9f112350a6b5b35aa871a733211d5912bd670b74ebf1d8a967a3c7 +size 384 diff --git a/margin_logs/step_0000100.npy b/margin_logs/step_0000100.npy new file mode 100644 index 0000000..9f076f9 --- /dev/null +++ b/margin_logs/step_0000100.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b242623b93cca3bfccabb7ad7924f1faa20165a554cae5223a5984410dbc703e +size 384 diff --git a/margin_logs/step_0000101.npy b/margin_logs/step_0000101.npy new file mode 100644 index 0000000..1c52063 --- /dev/null +++ b/margin_logs/step_0000101.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d86a90db6e790c66e43acb108d8cba6b81b7f9491e1fe22673f97ca2a0d9c727 +size 384 diff --git a/margin_logs/step_0000102.npy b/margin_logs/step_0000102.npy new file mode 100644 index 0000000..7317cfc --- /dev/null +++ b/margin_logs/step_0000102.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8944a24f0c83456598af1106b18add4e4d0aa94b95a00ca1242b5d60284e82db +size 384 diff --git a/margin_logs/step_0000103.npy b/margin_logs/step_0000103.npy new file mode 100644 index 0000000..2329a08 --- /dev/null +++ b/margin_logs/step_0000103.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edda6ee95ef7a52a378b32b2a225119fd566dad08ae142f5742a0777d673980e +size 384 diff --git a/margin_logs/step_0000104.npy b/margin_logs/step_0000104.npy new file mode 100644 index 0000000..a69a541 --- /dev/null +++ b/margin_logs/step_0000104.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0ecc79ee977e5d41ff850078883038b9fc68f0757e97a679fca7b197e3ee857 +size 384 diff --git a/margin_logs/step_0000105.npy b/margin_logs/step_0000105.npy new file mode 100644 index 0000000..7ef4cfb --- /dev/null +++ b/margin_logs/step_0000105.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e56719f852331ba0a6925d955e154e23c7e23491e18311a1a65468075442e153 +size 384 diff --git a/margin_logs/step_0000106.npy b/margin_logs/step_0000106.npy new file mode 100644 index 0000000..2db7ba1 --- /dev/null +++ b/margin_logs/step_0000106.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:399b29f354c885daeaa6850659156451c50961e0fcb9714a538726e1514e7877 +size 384 diff --git a/margin_logs/step_0000107.npy b/margin_logs/step_0000107.npy new file mode 100644 index 0000000..ad8c9e2 --- /dev/null +++ b/margin_logs/step_0000107.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b730319124b8dc3e3aa99ec647670793a630b6a8bf7d9643fc5d40dc81497028 +size 384 diff --git a/margin_logs/step_0000108.npy b/margin_logs/step_0000108.npy new file mode 100644 index 0000000..d58812b --- /dev/null +++ b/margin_logs/step_0000108.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25a3d1327683e4d328d32057a97440bfd288646580430a8f959739f1ca8db008 +size 384 diff --git a/margin_logs/step_0000109.npy b/margin_logs/step_0000109.npy new file mode 100644 index 0000000..e148c9c --- /dev/null +++ b/margin_logs/step_0000109.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc06826097466acf60b964847956ffaf9e8d71823833965b6c1b417601469397 +size 384 diff --git a/margin_logs/step_0000110.npy b/margin_logs/step_0000110.npy new file mode 100644 index 0000000..64c4605 --- /dev/null +++ b/margin_logs/step_0000110.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef752979792042df35e9ee677501fc072877e7b5e6ab0a632124573172cf8962 +size 384 diff --git a/margin_logs/step_0000111.npy b/margin_logs/step_0000111.npy new file mode 100644 index 0000000..ea1c74b --- /dev/null +++ b/margin_logs/step_0000111.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45219f26fdea87eb3c8c070e762074fecdebf0cd2c258577e1e559bec075d246 +size 384 diff --git a/margin_logs/step_0000112.npy b/margin_logs/step_0000112.npy new file mode 100644 index 0000000..b5d313e --- /dev/null +++ b/margin_logs/step_0000112.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f95c1345dbd6cd3fab0865fed0550824ea4424d557b3e54dcb702c416dcd196 +size 384 diff --git a/margin_logs/step_0000113.npy b/margin_logs/step_0000113.npy new file mode 100644 index 0000000..8f4a356 --- /dev/null +++ b/margin_logs/step_0000113.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28a9d220f1205ee5b5b7478458b9b8783b8b0b74faf462f74ee973091b8d9fcc +size 384 diff --git a/margin_logs/step_0000114.npy b/margin_logs/step_0000114.npy new file mode 100644 index 0000000..8858a9d --- /dev/null +++ b/margin_logs/step_0000114.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4b0f9555ae615ff8553bdd7d19b8536ca93c6b249eb32afb8abc8a18f7e124a +size 384 diff --git a/margin_logs/step_0000115.npy b/margin_logs/step_0000115.npy new file mode 100644 index 0000000..3edcacc --- /dev/null +++ b/margin_logs/step_0000115.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f78bd99262551cad31ac5e03616f47d8836bf2df1a7d0b381872456abdb0a2c1 +size 384 diff --git a/margin_logs/step_0000116.npy b/margin_logs/step_0000116.npy new file mode 100644 index 0000000..25fb812 --- /dev/null +++ b/margin_logs/step_0000116.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:845c561dda9450374df759ef0807e3a07e6724256992fef682d0dc77d4fe5c2c +size 384 diff --git a/margin_logs/step_0000117.npy b/margin_logs/step_0000117.npy new file mode 100644 index 0000000..4721a99 --- /dev/null +++ b/margin_logs/step_0000117.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77d5ae3b8b596e969c403140d5164b71f30c90cfa50c6465ba6b83d8365205e6 +size 384 diff --git a/margin_logs/step_0000118.npy b/margin_logs/step_0000118.npy new file mode 100644 index 0000000..ee8703c --- /dev/null +++ b/margin_logs/step_0000118.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a19f71e7fc9f5f1f35f3f5d42861478f619066e0b321e9ebae09eca0363e90f +size 384 diff --git a/margin_logs/step_0000119.npy b/margin_logs/step_0000119.npy new file mode 100644 index 0000000..1e1d41f --- /dev/null +++ b/margin_logs/step_0000119.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa16effb3456fa9ff6f94cc374f5052b328f638949bbcbfe443eae94bfe9c3cd +size 384 diff --git a/margin_logs/step_0000120.npy b/margin_logs/step_0000120.npy new file mode 100644 index 0000000..d035a96 --- /dev/null +++ b/margin_logs/step_0000120.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14b24fab044b7e681be8354f2fc023e637f34a1fb10fba6714fa0ac6a608cf4e +size 384 diff --git a/margin_logs/step_0000121.npy b/margin_logs/step_0000121.npy new file mode 100644 index 0000000..ba057ae --- /dev/null +++ b/margin_logs/step_0000121.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7b9de16b44341a86e0f69bc1692afe316361d2e9c1cdcdb343671f3276c5d3a +size 384 diff --git a/margin_logs/step_0000122.npy b/margin_logs/step_0000122.npy new file mode 100644 index 0000000..0cf4f6d --- /dev/null +++ b/margin_logs/step_0000122.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b3435bb2399e400d5690fc8c3cdaeb3ba040f9bed5b3556f7ea5bb2f27a0566 +size 384 diff --git a/margin_logs/step_0000123.npy b/margin_logs/step_0000123.npy new file mode 100644 index 0000000..98737ad --- /dev/null +++ b/margin_logs/step_0000123.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:994c3ce32490423417139dcb50d40ee3f4ebf2b16f11a462d5fab4bb3d31fc82 +size 384 diff --git a/margin_logs/step_0000124.npy b/margin_logs/step_0000124.npy new file mode 100644 index 0000000..9f039f3 --- /dev/null +++ b/margin_logs/step_0000124.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2fa80f714bab5420e9b46a33fd89f32d3af8fb5f3670258aafc1acee065e616 +size 384 diff --git a/margin_logs/step_0000125.npy b/margin_logs/step_0000125.npy new file mode 100644 index 0000000..4be62d7 --- /dev/null +++ b/margin_logs/step_0000125.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c55b4795c156453b2c4b6c11bcf0697abc4cf94ecab1e6eb0c293e2b7e9ae59c +size 384 diff --git a/margin_logs/step_0000126.npy b/margin_logs/step_0000126.npy new file mode 100644 index 0000000..2d5cf6d --- /dev/null +++ b/margin_logs/step_0000126.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db8334a756acf1d8147e38cfb5698c7c3723a7a0230a28ae8899328b4b4c70ab +size 384 diff --git a/margin_logs/step_0000127.npy b/margin_logs/step_0000127.npy new file mode 100644 index 0000000..2272d80 --- /dev/null +++ b/margin_logs/step_0000127.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48efb5709ddc8674952c1995feff09b470fec25c83aa461afdb7ca3c705c48c8 +size 384 diff --git a/margin_logs/step_0000128.npy b/margin_logs/step_0000128.npy new file mode 100644 index 0000000..88101ee --- /dev/null +++ b/margin_logs/step_0000128.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fc993c7a29d7bbccaa257073db8e4c994be36c342e9dbb000b3fe0a93ba8210 +size 384 diff --git a/margin_logs/step_0000129.npy b/margin_logs/step_0000129.npy new file mode 100644 index 0000000..1976b97 --- /dev/null +++ b/margin_logs/step_0000129.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adaa0c502dac98f5e50940bfdf2b7faa99aa9a09ad391f9c72e203539a28c25f +size 384 diff --git a/margin_logs/step_0000130.npy b/margin_logs/step_0000130.npy new file mode 100644 index 0000000..9c4497f --- /dev/null +++ b/margin_logs/step_0000130.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6b7058eb3b58fdd9b07837a09de87c34490fc88733eeb43213989852331883a +size 384 diff --git a/margin_logs/step_0000131.npy b/margin_logs/step_0000131.npy new file mode 100644 index 0000000..dd890d5 --- /dev/null +++ b/margin_logs/step_0000131.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2cc795c98cffa5c39dd20531e34c06f2303a5c1913480868975d2ff494e7955 +size 384 diff --git a/margin_logs/step_0000132.npy b/margin_logs/step_0000132.npy new file mode 100644 index 0000000..f4fa9e4 --- /dev/null +++ b/margin_logs/step_0000132.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81cda2af900744f3832dbb0202be5f60093b16860c7bbe0121ccdf3ee53e7af8 +size 384 diff --git a/margin_logs/step_0000133.npy b/margin_logs/step_0000133.npy new file mode 100644 index 0000000..8928787 --- /dev/null +++ b/margin_logs/step_0000133.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1026d8a0728dd15134e9ce779604e44362c72c2d4ad1d9cd53465266dfd58ba +size 384 diff --git a/margin_logs/step_0000134.npy b/margin_logs/step_0000134.npy new file mode 100644 index 0000000..49bb074 --- /dev/null +++ b/margin_logs/step_0000134.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eacb2f1848af97927ccf8e1285b7aa48700bbdb2ef2a4efca7347e6202ff26d7 +size 384 diff --git a/margin_logs/step_0000135.npy b/margin_logs/step_0000135.npy new file mode 100644 index 0000000..62ae0a1 --- /dev/null +++ b/margin_logs/step_0000135.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10122861e1639a6df34db899099014d242ce8e5d569659d7ed96257f015b4089 +size 384 diff --git a/margin_logs/step_0000136.npy b/margin_logs/step_0000136.npy new file mode 100644 index 0000000..b003e4a --- /dev/null +++ b/margin_logs/step_0000136.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41a5cbf99ab7815101c8c60b34cc7341fe883e45400a4523667c6aa1445f30d4 +size 384 diff --git a/margin_logs/step_0000137.npy b/margin_logs/step_0000137.npy new file mode 100644 index 0000000..a69adc6 --- /dev/null +++ b/margin_logs/step_0000137.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3d503dc6233899862673a27cacc508db84ba9221b945460f51a383f858f9d59 +size 384 diff --git a/margin_logs/step_0000138.npy b/margin_logs/step_0000138.npy new file mode 100644 index 0000000..0c0c629 --- /dev/null +++ b/margin_logs/step_0000138.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d37bb68c249e1d789175b3d5eb432f653825b02fb612f46d2368551012585ae6 +size 384 diff --git a/margin_logs/step_0000139.npy b/margin_logs/step_0000139.npy new file mode 100644 index 0000000..177e06b --- /dev/null +++ b/margin_logs/step_0000139.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3aad0cc5cf5ebdedadab63122a065b0b533c1feb3af514dad1a79ee16e2e1d48 +size 384 diff --git a/margin_logs/step_0000140.npy b/margin_logs/step_0000140.npy new file mode 100644 index 0000000..33b3425 --- /dev/null +++ b/margin_logs/step_0000140.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3055ea31893ef65759a00fa9f8144559198ad5428f9ddee5519084aa12507fd9 +size 384 diff --git a/margin_logs/step_0000141.npy b/margin_logs/step_0000141.npy new file mode 100644 index 0000000..9229bcb --- /dev/null +++ b/margin_logs/step_0000141.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a34dbef4a5f2feb530bc95879528dda7b3e847066e6534d9724041d9a965ace3 +size 384 diff --git a/margin_logs/step_0000142.npy b/margin_logs/step_0000142.npy new file mode 100644 index 0000000..7e64ff8 --- /dev/null +++ b/margin_logs/step_0000142.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f752d299dfcad9a20883d78b9d0f671f6781cbadfc7db53a48380ed37aa2559 +size 384 diff --git a/margin_logs/step_0000143.npy b/margin_logs/step_0000143.npy new file mode 100644 index 0000000..05aee2a --- /dev/null +++ b/margin_logs/step_0000143.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:506a7e6ff0ba0dace48fca70267f8d9385ce7c12e7b00da4457acff431dc6b8f +size 384 diff --git a/margin_logs/step_0000144.npy b/margin_logs/step_0000144.npy new file mode 100644 index 0000000..0ac751a --- /dev/null +++ b/margin_logs/step_0000144.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4a9d4bce3d023ca7ded23ae675a79887cd2ba5a7a4978c6044889f5f181db0b +size 384 diff --git a/margin_logs/step_0000145.npy b/margin_logs/step_0000145.npy new file mode 100644 index 0000000..5c3ab96 --- /dev/null +++ b/margin_logs/step_0000145.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b176515af3cda7b501828bdb750ed62a5fbe69f6c74616a997d2e2543c1ffc9 +size 384 diff --git a/margin_logs/step_0000146.npy b/margin_logs/step_0000146.npy new file mode 100644 index 0000000..f6b2d67 --- /dev/null +++ b/margin_logs/step_0000146.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30579be26186f39e4c84340e4129da19fe3564f77517b12609fd9980dc838355 +size 384 diff --git a/margin_logs/step_0000147.npy b/margin_logs/step_0000147.npy new file mode 100644 index 0000000..0bf96e1 --- /dev/null +++ b/margin_logs/step_0000147.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52e152b104cc4f8099d1b9bc3446b8c54a9ce7f1504019bae75d189ac71a0e86 +size 384 diff --git a/margin_logs/step_0000148.npy b/margin_logs/step_0000148.npy new file mode 100644 index 0000000..497caff --- /dev/null +++ b/margin_logs/step_0000148.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:069eb60d6a8cfc41c0266eaa7c021410a144bf90ef3e14e75bd7ab8ba3916d60 +size 384 diff --git a/margin_logs/step_0000149.npy b/margin_logs/step_0000149.npy new file mode 100644 index 0000000..c4063c3 --- /dev/null +++ b/margin_logs/step_0000149.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa64fc1b80144f3876ae0c12b4e665220864d54e56fa7f2b70740801240701c6 +size 384 diff --git a/margin_logs/step_0000150.npy b/margin_logs/step_0000150.npy new file mode 100644 index 0000000..833226d --- /dev/null +++ b/margin_logs/step_0000150.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5789fe9cf0d575a3bf6f7448e8a290c84e178e64a3b7e37c0956dac7d922c61 +size 384 diff --git a/margin_logs/step_0000151.npy b/margin_logs/step_0000151.npy new file mode 100644 index 0000000..a7e4a51 --- /dev/null +++ b/margin_logs/step_0000151.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc005d145e95d2457298575aef96a6b008cb98857c6635b14d402383ddfbee30 +size 384 diff --git a/margin_logs/step_0000152.npy b/margin_logs/step_0000152.npy new file mode 100644 index 0000000..297d442 --- /dev/null +++ b/margin_logs/step_0000152.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40f796ec4f93279d958e9d86134bbd9028488ca5b86e3bd90fee4bfa38855c37 +size 384 diff --git a/margin_logs/step_0000153.npy b/margin_logs/step_0000153.npy new file mode 100644 index 0000000..8859710 --- /dev/null +++ b/margin_logs/step_0000153.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc5c200524634aef01e40505bc2477c0981626a080358fae424e89d36b3cc488 +size 384 diff --git a/margin_logs/step_0000154.npy b/margin_logs/step_0000154.npy new file mode 100644 index 0000000..1300288 --- /dev/null +++ b/margin_logs/step_0000154.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b41f731d3adf3657ed2fda884e99097283473098972f35173343cffd238fa37 +size 384 diff --git a/margin_logs/step_0000155.npy b/margin_logs/step_0000155.npy new file mode 100644 index 0000000..d6bf43a --- /dev/null +++ b/margin_logs/step_0000155.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37a5407c80f28f57822f5baba3576cb38978cf2b7c6079d319a605cfd44e55e2 +size 384 diff --git a/margin_logs/step_0000156.npy b/margin_logs/step_0000156.npy new file mode 100644 index 0000000..9ad1b73 --- /dev/null +++ b/margin_logs/step_0000156.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68e1bc7d1e699c02c079aa88e0f89cca7128a5d5c5efd8cb4512f5d118f1274d +size 384 diff --git a/margin_logs/step_0000157.npy b/margin_logs/step_0000157.npy new file mode 100644 index 0000000..a3c6a83 --- /dev/null +++ b/margin_logs/step_0000157.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a46ad00cbe77b697036ca5eb4e6e4d34dfced3e351ba8ce3fd555deed4ed811 +size 384 diff --git a/margin_logs/step_0000158.npy b/margin_logs/step_0000158.npy new file mode 100644 index 0000000..8da548c --- /dev/null +++ b/margin_logs/step_0000158.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3675c44c7404beb1cd5b86e0ce0065d61389efe1b93b1cc70d2dff2994c966a +size 384 diff --git a/margin_logs/step_0000159.npy b/margin_logs/step_0000159.npy new file mode 100644 index 0000000..5b4c96c --- /dev/null +++ b/margin_logs/step_0000159.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7eb926eee69d6481467b625d2d7a23bb65edc2a051c86dadaa703e9a16d7f9b3 +size 384 diff --git a/margin_logs/step_0000160.npy b/margin_logs/step_0000160.npy new file mode 100644 index 0000000..7e86fe9 --- /dev/null +++ b/margin_logs/step_0000160.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf03aee34ac1204bfb839a4593e392968fceddfa01371a9b0d293aaca9ee932d +size 384 diff --git a/margin_logs/step_0000161.npy b/margin_logs/step_0000161.npy new file mode 100644 index 0000000..dfd361a --- /dev/null +++ b/margin_logs/step_0000161.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e8b4478d6d0832f9e6573fb06d836c9eb3e779d1dec4be6f1d9322eabc0cfb1 +size 384 diff --git a/margin_logs/step_0000162.npy b/margin_logs/step_0000162.npy new file mode 100644 index 0000000..0602109 --- /dev/null +++ b/margin_logs/step_0000162.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c87120135fabe91c424f634dd2599d8cde7dfc345fb4b4e5d40d2b7f2850f73 +size 384 diff --git a/margin_logs/step_0000163.npy b/margin_logs/step_0000163.npy new file mode 100644 index 0000000..f6e1745 --- /dev/null +++ b/margin_logs/step_0000163.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f50aa32e75af449e55494f30b34306fd3b6933e47897cf2df9e60f0fde5b6bcf +size 384 diff --git a/margin_logs/step_0000164.npy b/margin_logs/step_0000164.npy new file mode 100644 index 0000000..3c45e46 --- /dev/null +++ b/margin_logs/step_0000164.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6d78a4f4819a265ee738608f84d36060ad3fbf1c0a9601c126f95fa2e6393a0 +size 384 diff --git a/margin_logs/step_0000165.npy b/margin_logs/step_0000165.npy new file mode 100644 index 0000000..adc6434 --- /dev/null +++ b/margin_logs/step_0000165.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eac96a72aaf8a183eccdb4ee989862eb7d88fb5712674a679a7f2c91fd0cc51c +size 384 diff --git a/margin_logs/step_0000166.npy b/margin_logs/step_0000166.npy new file mode 100644 index 0000000..8a9d8bc --- /dev/null +++ b/margin_logs/step_0000166.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:323e58c7629e8ed2d959843bd922a69a5dcd6b351c8aa82e2a1b2209a4004fe1 +size 384 diff --git a/margin_logs/step_0000167.npy b/margin_logs/step_0000167.npy new file mode 100644 index 0000000..3913d0d --- /dev/null +++ b/margin_logs/step_0000167.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52a21aec60f3460c10f5de5cf2c2dacbe6db7352ea7b84e65b04f6ca46f521a7 +size 384 diff --git a/margin_logs/step_0000168.npy b/margin_logs/step_0000168.npy new file mode 100644 index 0000000..0c9376c --- /dev/null +++ b/margin_logs/step_0000168.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06dab63e4b310842ccde796574154c64a600735b0dc29c94356857d46def66a1 +size 384 diff --git a/margin_logs/step_0000169.npy b/margin_logs/step_0000169.npy new file mode 100644 index 0000000..fa321e4 --- /dev/null +++ b/margin_logs/step_0000169.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49e36f7c49c7d967615485812449bc25001fe977cbf5c2654c39f3adb3e79e68 +size 384 diff --git a/margin_logs/step_0000170.npy b/margin_logs/step_0000170.npy new file mode 100644 index 0000000..498abee --- /dev/null +++ b/margin_logs/step_0000170.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4caa033738fb590ee7d4cf2579ef9f8a399c2e2a844aabb62aaf48d59a3ba4a +size 384 diff --git a/margin_logs/step_0000171.npy b/margin_logs/step_0000171.npy new file mode 100644 index 0000000..fb189d3 --- /dev/null +++ b/margin_logs/step_0000171.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cac78f104f40921d13df873d14157601ddaddc5e88f12083f146b12a8a9e5dfa +size 384 diff --git a/margin_logs/step_0000172.npy b/margin_logs/step_0000172.npy new file mode 100644 index 0000000..d85f57b --- /dev/null +++ b/margin_logs/step_0000172.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50205831bb7a36be709974c2933e9c7665a40317a7a5f1a3acd213cc3f4b8b18 +size 384 diff --git a/margin_logs/step_0000173.npy b/margin_logs/step_0000173.npy new file mode 100644 index 0000000..fb8ca53 --- /dev/null +++ b/margin_logs/step_0000173.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1a5be70d5d6e14c4ab0fad3b85b660f8d9cf12d431f085990b3f4f18ea866cf +size 384 diff --git a/margin_logs/step_0000174.npy b/margin_logs/step_0000174.npy new file mode 100644 index 0000000..cc891fe --- /dev/null +++ b/margin_logs/step_0000174.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecae3d9d83e3d8957db49ad63631171e4225a718778824b3a782693c2034f1dd +size 384 diff --git a/margin_logs/step_0000175.npy b/margin_logs/step_0000175.npy new file mode 100644 index 0000000..c953814 --- /dev/null +++ b/margin_logs/step_0000175.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5273abd5bc2456a4ebbf55134b1a656561436675fc8880e29688e61cd4d047b +size 384 diff --git a/margin_logs/step_0000176.npy b/margin_logs/step_0000176.npy new file mode 100644 index 0000000..1733047 --- /dev/null +++ b/margin_logs/step_0000176.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47a1dc44fe0681d675d9c98cd82ac77c6be081c75ba8208146809f719b768fdf +size 384 diff --git a/margin_logs/step_0000177.npy b/margin_logs/step_0000177.npy new file mode 100644 index 0000000..66ed4e5 --- /dev/null +++ b/margin_logs/step_0000177.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87ab15faca6422e4dc4940fb96717aa335d679d1a0604a608831e19a159c999f +size 384 diff --git a/margin_logs/step_0000178.npy b/margin_logs/step_0000178.npy new file mode 100644 index 0000000..543f9cd --- /dev/null +++ b/margin_logs/step_0000178.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b317853ace09c7e84c4824c00ccb0eceea4f06514f5b1fb62ab07eec5092046 +size 384 diff --git a/margin_logs/step_0000179.npy b/margin_logs/step_0000179.npy new file mode 100644 index 0000000..2a11c3d --- /dev/null +++ b/margin_logs/step_0000179.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d733d5f18800255295d92314d3b4df9ae27d8905605d33ed1b2d970f688fc33 +size 384 diff --git a/margin_logs/step_0000180.npy b/margin_logs/step_0000180.npy new file mode 100644 index 0000000..8053877 --- /dev/null +++ b/margin_logs/step_0000180.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c3a136aec1dabdc858e61dcd23bb467ddb27efe815ca8ce5bd23171f95f1c9d +size 384 diff --git a/margin_logs/step_0000181.npy b/margin_logs/step_0000181.npy new file mode 100644 index 0000000..decd24a --- /dev/null +++ b/margin_logs/step_0000181.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:165fbf1f44f3df9630cdabd15e6b1e5ee04b3afadbaa91046277deaa47606a73 +size 384 diff --git a/margin_logs/step_0000182.npy b/margin_logs/step_0000182.npy new file mode 100644 index 0000000..2372a82 --- /dev/null +++ b/margin_logs/step_0000182.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:842cc497df9b95de2b0cb8690c8c9ebeef980aa9c4badb17008c56e77f907f09 +size 384 diff --git a/margin_logs/step_0000183.npy b/margin_logs/step_0000183.npy new file mode 100644 index 0000000..c82932f --- /dev/null +++ b/margin_logs/step_0000183.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f76b85ae4b0494180e9c6dd638f00f49fd9d00641acd276059fd89500d8ce4b7 +size 384 diff --git a/margin_logs/step_0000184.npy b/margin_logs/step_0000184.npy new file mode 100644 index 0000000..032e237 --- /dev/null +++ b/margin_logs/step_0000184.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a53930e11edd6666b22c9c2909fa0b912a72e2714c5cae570f6e38b6801611ca +size 384 diff --git a/margin_logs/step_0000185.npy b/margin_logs/step_0000185.npy new file mode 100644 index 0000000..32a1b55 --- /dev/null +++ b/margin_logs/step_0000185.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8143fc3358e1f43eeb64bff3371c89366ce5fc222b683af34ff433292631dfa6 +size 384 diff --git a/margin_logs/step_0000186.npy b/margin_logs/step_0000186.npy new file mode 100644 index 0000000..9f511c6 --- /dev/null +++ b/margin_logs/step_0000186.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27ce2d6367cd28e8b2379d8eabba679ed942c3a3a75d25d255ed28cfc194755c +size 384 diff --git a/margin_logs/step_0000187.npy b/margin_logs/step_0000187.npy new file mode 100644 index 0000000..68fd7aa --- /dev/null +++ b/margin_logs/step_0000187.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87413e6a07e60c25a3d355ede292941faaf20216b6ba9efe8f897e99a3126938 +size 384 diff --git a/margin_logs/step_0000188.npy b/margin_logs/step_0000188.npy new file mode 100644 index 0000000..1ff91d2 --- /dev/null +++ b/margin_logs/step_0000188.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d127f0e13a7033ae551aec9450a53616d6ba76f6cd7540928e2dd830b7cee613 +size 384 diff --git a/margin_logs/step_0000189.npy b/margin_logs/step_0000189.npy new file mode 100644 index 0000000..5e6d8b2 --- /dev/null +++ b/margin_logs/step_0000189.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75013cc2697534e3f7989b49efb8ae834c56a82e9d44fbc019334bfea6df49cf +size 384 diff --git a/margin_logs/step_0000190.npy b/margin_logs/step_0000190.npy new file mode 100644 index 0000000..84d2d91 --- /dev/null +++ b/margin_logs/step_0000190.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d1173129d06af9168bbd3b2b5c7296c62baa000f75b2d81d3283e9d8e514f79 +size 384 diff --git a/margin_logs/step_0000191.npy b/margin_logs/step_0000191.npy new file mode 100644 index 0000000..6d00f39 --- /dev/null +++ b/margin_logs/step_0000191.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46ab600a0e1b1203e956045cb6f68e32e92649045621d56955da962cc8925299 +size 384 diff --git a/margin_logs/step_0000192.npy b/margin_logs/step_0000192.npy new file mode 100644 index 0000000..3182d27 --- /dev/null +++ b/margin_logs/step_0000192.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a93bcb3d02b367034b8ee5e1393a61cabf728224aa224d1c1faf70a5a0f1391c +size 384 diff --git a/margin_logs/step_0000193.npy b/margin_logs/step_0000193.npy new file mode 100644 index 0000000..df7fcdc --- /dev/null +++ b/margin_logs/step_0000193.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab959db972aebb4bdcaf8af51e5149a69db64e2b9039b7e36c810a8928cfad41 +size 384 diff --git a/margin_logs/step_0000194.npy b/margin_logs/step_0000194.npy new file mode 100644 index 0000000..82e76ee --- /dev/null +++ b/margin_logs/step_0000194.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:481142a2f41d5f9115a62e20eb4932cc2398f261f999b473f09af4814a2a8703 +size 384 diff --git a/margin_logs/step_0000195.npy b/margin_logs/step_0000195.npy new file mode 100644 index 0000000..261013e --- /dev/null +++ b/margin_logs/step_0000195.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1190b8a1f1c160f73236b56ed517e1c363d45c5d28cad4516e136787446348c +size 384 diff --git a/margin_logs/step_0000196.npy b/margin_logs/step_0000196.npy new file mode 100644 index 0000000..ac51058 --- /dev/null +++ b/margin_logs/step_0000196.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5aa61acb7c5da9940e7f0b2f63e3e8fbec60c397e809928b4d0ec7e39001eec +size 384 diff --git a/margin_logs/step_0000197.npy b/margin_logs/step_0000197.npy new file mode 100644 index 0000000..a234592 --- /dev/null +++ b/margin_logs/step_0000197.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:affe4ab9d41dc842646b52b336e35c0e3f341a0a4d846f49e4f7a6397173b50f +size 384 diff --git a/margin_logs/step_0000198.npy b/margin_logs/step_0000198.npy new file mode 100644 index 0000000..b91ed7e --- /dev/null +++ b/margin_logs/step_0000198.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79a4e2fa6f689acdfed3ecaa10df7dcd492fb7b98a490de0cb153f57adabeb1d +size 384 diff --git a/margin_logs/step_0000199.npy b/margin_logs/step_0000199.npy new file mode 100644 index 0000000..35a6bd0 --- /dev/null +++ b/margin_logs/step_0000199.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c3122edd389a42b44283eda7fbed5f6b6405778819cbe28bf23655eb1b8e599 +size 384 diff --git a/margin_logs/step_0000200.npy b/margin_logs/step_0000200.npy new file mode 100644 index 0000000..096b506 --- /dev/null +++ b/margin_logs/step_0000200.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41a2fa86167fa51bf8c1ac46509c7eae929b4a5c981241250ffb47275f0b54e5 +size 384 diff --git a/margin_logs/step_0000201.npy b/margin_logs/step_0000201.npy new file mode 100644 index 0000000..d06fa09 --- /dev/null +++ b/margin_logs/step_0000201.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f08a461bc1e5017e463cb10883e7fc124e1db34e1787638888ef1bcdd957884 +size 384 diff --git a/margin_logs/step_0000202.npy b/margin_logs/step_0000202.npy new file mode 100644 index 0000000..605631d --- /dev/null +++ b/margin_logs/step_0000202.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:859622b4bf777bf79577dd189cd4f7b1d0974c6b59e1b39c4533c06e30029b1b +size 384 diff --git a/margin_logs/step_0000203.npy b/margin_logs/step_0000203.npy new file mode 100644 index 0000000..d668dc0 --- /dev/null +++ b/margin_logs/step_0000203.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99e0f96af7779c771178d3f5f500796312c6dc72f8001eefb3bb1c72bbf5b5b3 +size 384 diff --git a/margin_logs/step_0000204.npy b/margin_logs/step_0000204.npy new file mode 100644 index 0000000..84cc4f0 --- /dev/null +++ b/margin_logs/step_0000204.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c25a03a35159181e3b1ae96d868dac23eb248977c36c540d0ccf873431d8fbe +size 384 diff --git a/margin_logs/step_0000205.npy b/margin_logs/step_0000205.npy new file mode 100644 index 0000000..61d832e --- /dev/null +++ b/margin_logs/step_0000205.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b51421e1df2bf948cb903a8e19f10a71b32a5c5a43f100eaf84aa5ba530f963 +size 384 diff --git a/margin_logs/step_0000206.npy b/margin_logs/step_0000206.npy new file mode 100644 index 0000000..d6c5324 --- /dev/null +++ b/margin_logs/step_0000206.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f31c4a91cd10c72eca87a48ddb44f2a1aad289d5174815b7f505a53b0e9e3f91 +size 384 diff --git a/margin_logs/step_0000207.npy b/margin_logs/step_0000207.npy new file mode 100644 index 0000000..b2512f5 --- /dev/null +++ b/margin_logs/step_0000207.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:814bfe56c1a69519e8fb06dd841af23ca5748e03b8ea52f896cb3f2a8672f86a +size 384 diff --git a/margin_logs/step_0000208.npy b/margin_logs/step_0000208.npy new file mode 100644 index 0000000..ede8814 --- /dev/null +++ b/margin_logs/step_0000208.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01e0ac677cb2a52998973fd75ee99404449428b84556b90c9720b16dd49cf787 +size 384 diff --git a/margin_logs/step_0000209.npy b/margin_logs/step_0000209.npy new file mode 100644 index 0000000..a4eacf4 --- /dev/null +++ b/margin_logs/step_0000209.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a7e4d52994a4e96edac842192545aea8419813c6c42ecc5b0d156e16acb841a +size 384 diff --git a/margin_logs/step_0000210.npy b/margin_logs/step_0000210.npy new file mode 100644 index 0000000..536eab4 --- /dev/null +++ b/margin_logs/step_0000210.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:519be545e38ad0c3a436dd5d8160444924309d007832d66c4be4a3258809ef86 +size 384 diff --git a/margin_logs/step_0000211.npy b/margin_logs/step_0000211.npy new file mode 100644 index 0000000..80d5076 --- /dev/null +++ b/margin_logs/step_0000211.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0388cc47b6ff262453c8ea8f6c805f5fe9614cf602fe4d953eaf003cfe83f6da +size 384 diff --git a/margin_logs/step_0000212.npy b/margin_logs/step_0000212.npy new file mode 100644 index 0000000..31e52ab --- /dev/null +++ b/margin_logs/step_0000212.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da4191f19b5cbd3296ed266aa7b6c3434e44fe84190e6451dbcba2d131e6024c +size 384 diff --git a/margin_logs/step_0000213.npy b/margin_logs/step_0000213.npy new file mode 100644 index 0000000..f4a96d7 --- /dev/null +++ b/margin_logs/step_0000213.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27af477ae7e927c907a9f8900a1a34ed844c2d566cd3fb396cbe05dba470c4a9 +size 384 diff --git a/margin_logs/step_0000214.npy b/margin_logs/step_0000214.npy new file mode 100644 index 0000000..e2a739e --- /dev/null +++ b/margin_logs/step_0000214.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1da167a09432e132ae7967408cbc56467b087485e66f31d16aad043809395948 +size 384 diff --git a/margin_logs/step_0000215.npy b/margin_logs/step_0000215.npy new file mode 100644 index 0000000..c9cac4e --- /dev/null +++ b/margin_logs/step_0000215.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c7aeaafb88bfaf40bf98dda801ebf1a30fce6e5eb259ba4e70eaaee37c8fdb5 +size 384 diff --git a/margin_logs/step_0000216.npy b/margin_logs/step_0000216.npy new file mode 100644 index 0000000..83095e8 --- /dev/null +++ b/margin_logs/step_0000216.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:136e9e812533d203b111da39905a9ef06346e14b4a7d1fcf455b65b915c89dfb +size 384 diff --git a/margin_logs/step_0000217.npy b/margin_logs/step_0000217.npy new file mode 100644 index 0000000..72d6932 --- /dev/null +++ b/margin_logs/step_0000217.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3450780b74af76da26098ca83af905aa4518162d9f351a125204200eca606f88 +size 384 diff --git a/margin_logs/step_0000218.npy b/margin_logs/step_0000218.npy new file mode 100644 index 0000000..45039ba --- /dev/null +++ b/margin_logs/step_0000218.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca2b4fc47c5decdbc22c0dbd3d2a45163e3f09da38ce562eabe36ba155350922 +size 384 diff --git a/margin_logs/step_0000219.npy b/margin_logs/step_0000219.npy new file mode 100644 index 0000000..87b8a3c --- /dev/null +++ b/margin_logs/step_0000219.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96654870bc5687d2828286af4449be37c69794f6dfad8edabfca13148e921b4a +size 384 diff --git a/margin_logs/step_0000220.npy b/margin_logs/step_0000220.npy new file mode 100644 index 0000000..ecb070b --- /dev/null +++ b/margin_logs/step_0000220.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65c34291fdace3b3cc8e6a64c1c25e726a65b4fd020136a9cf278db24be5d359 +size 384 diff --git a/margin_logs/step_0000221.npy b/margin_logs/step_0000221.npy new file mode 100644 index 0000000..f98e72c --- /dev/null +++ b/margin_logs/step_0000221.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81c927b86c94814ce706317dfdea3aa6319d37a2873b3c964afa017674b67948 +size 384 diff --git a/margin_logs/step_0000222.npy b/margin_logs/step_0000222.npy new file mode 100644 index 0000000..fe064c2 --- /dev/null +++ b/margin_logs/step_0000222.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ca429eb66621cd1015e445fbc83e89fa3feeda3c215dc2420eb09e98b3d3f66 +size 384 diff --git a/margin_logs/step_0000223.npy b/margin_logs/step_0000223.npy new file mode 100644 index 0000000..8298983 --- /dev/null +++ b/margin_logs/step_0000223.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af718c9f921969efcb7b3619b2d7ae89f564ac2f6ba1a437172a3e67ca7088de +size 384 diff --git a/margin_logs/step_0000224.npy b/margin_logs/step_0000224.npy new file mode 100644 index 0000000..ee4f584 --- /dev/null +++ b/margin_logs/step_0000224.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8df0d404dc4425fb9017ec07901ab0d5eb105d36e77e6fc1ad2ff1a51777253 +size 384 diff --git a/margin_logs/step_0000225.npy b/margin_logs/step_0000225.npy new file mode 100644 index 0000000..f167f1b --- /dev/null +++ b/margin_logs/step_0000225.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c4c549678029e69db33e0eaea617d785b69d6c163d7b066009e2738cb0dbb02 +size 384 diff --git a/margin_logs/step_0000226.npy b/margin_logs/step_0000226.npy new file mode 100644 index 0000000..92177a3 --- /dev/null +++ b/margin_logs/step_0000226.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74f6715b8920452c241eb39781465b63bbfd7b390012e46afa136872c284e4e4 +size 384 diff --git a/margin_logs/step_0000227.npy b/margin_logs/step_0000227.npy new file mode 100644 index 0000000..8cebe67 --- /dev/null +++ b/margin_logs/step_0000227.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:342c5082181cd4c12ca8c7c04f286252cfdcd2ecf3680c8170b4157cdecd8aa7 +size 384 diff --git a/margin_logs/step_0000228.npy b/margin_logs/step_0000228.npy new file mode 100644 index 0000000..4fa9aee --- /dev/null +++ b/margin_logs/step_0000228.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edc33857fd0995e62cda2d1a949c3f3e026bec23504eb9edfc1a3319e78e5d32 +size 384 diff --git a/margin_logs/step_0000229.npy b/margin_logs/step_0000229.npy new file mode 100644 index 0000000..2ab45b6 --- /dev/null +++ b/margin_logs/step_0000229.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b1e9be64954e5e5035d3ef94c5068a65b142c721ded69c1e1c80368e7d59d4a +size 384 diff --git a/margin_logs/step_0000230.npy b/margin_logs/step_0000230.npy new file mode 100644 index 0000000..d09cb46 --- /dev/null +++ b/margin_logs/step_0000230.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da89db9ea2dc15c28397cfea5320376ca79703a3f7050315ac1abf06513986a4 +size 384 diff --git a/margin_logs/step_0000231.npy b/margin_logs/step_0000231.npy new file mode 100644 index 0000000..1021dae --- /dev/null +++ b/margin_logs/step_0000231.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd4343e0d00b27a40882d3d620f6b0691a842fd210d140cb75bf73729bf3d2db +size 384 diff --git a/margin_logs/step_0000232.npy b/margin_logs/step_0000232.npy new file mode 100644 index 0000000..f087219 --- /dev/null +++ b/margin_logs/step_0000232.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88b1d5ca5171c43b1eeae3975dc739fab8cf319a42f101ce6be5d89c7345b4c5 +size 384 diff --git a/margin_logs/step_0000233.npy b/margin_logs/step_0000233.npy new file mode 100644 index 0000000..60c27e3 --- /dev/null +++ b/margin_logs/step_0000233.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c630ddcc8cea7ddc7cabc5827da256b4647d08cbd215a2bf302698821fa8889 +size 384 diff --git a/margin_logs/step_0000234.npy b/margin_logs/step_0000234.npy new file mode 100644 index 0000000..37f4dae --- /dev/null +++ b/margin_logs/step_0000234.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb5016483f7b9b46bf783d1439db04af2f971e666a573be8d9a05e78213207c1 +size 384 diff --git a/margin_logs/step_0000235.npy b/margin_logs/step_0000235.npy new file mode 100644 index 0000000..e5f1928 --- /dev/null +++ b/margin_logs/step_0000235.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ab0a2c9e2344d89b963c5d429d9f9f62cc2e9b414a7265f78821abeae9f7b25 +size 384 diff --git a/margin_logs/step_0000236.npy b/margin_logs/step_0000236.npy new file mode 100644 index 0000000..190974e --- /dev/null +++ b/margin_logs/step_0000236.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a98dd58165050a77ae34fdbdb9f5dfac68528ab06e02d947a8f704c26ba09e6a +size 384 diff --git a/margin_logs/step_0000237.npy b/margin_logs/step_0000237.npy new file mode 100644 index 0000000..76397bc --- /dev/null +++ b/margin_logs/step_0000237.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e945ffc9b738721e81f884f288c6b926f07afd795c767d64f3f22de4e6c2fcd4 +size 384 diff --git a/margin_logs/step_0000238.npy b/margin_logs/step_0000238.npy new file mode 100644 index 0000000..11c0b71 --- /dev/null +++ b/margin_logs/step_0000238.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2735c40339cdcf1f7b5845896557eed82481bc1974665b8d6f5bee0ffe88f840 +size 384 diff --git a/margin_logs/step_0000239.npy b/margin_logs/step_0000239.npy new file mode 100644 index 0000000..bb1506f --- /dev/null +++ b/margin_logs/step_0000239.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab2e4a3620897ba222ce991a9f916e67896407a966a79c433e50d87ab49c94a2 +size 384 diff --git a/margin_logs/step_0000240.npy b/margin_logs/step_0000240.npy new file mode 100644 index 0000000..3e98365 --- /dev/null +++ b/margin_logs/step_0000240.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d1d6b4867080718496553a6fe193a88a45efd4766cffb4c7662491e9b0b09b0 +size 384 diff --git a/margin_logs/step_0000241.npy b/margin_logs/step_0000241.npy new file mode 100644 index 0000000..88f88cb --- /dev/null +++ b/margin_logs/step_0000241.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b2a67b3ac0ce8c12808b9aa7666e0ebbcdcf49d553423fc49fb39aed0c47617 +size 384 diff --git a/margin_logs/step_0000242.npy b/margin_logs/step_0000242.npy new file mode 100644 index 0000000..bd6b7ef --- /dev/null +++ b/margin_logs/step_0000242.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bc444c379bdcd756d262a24d290d940beb4843bd3c37a84a9df1ac68ce03504 +size 384 diff --git a/margin_logs/step_0000243.npy b/margin_logs/step_0000243.npy new file mode 100644 index 0000000..6c54c76 --- /dev/null +++ b/margin_logs/step_0000243.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68e6e633cf6a346b27dad98578a5537c19fd0bf8bc0ccc9db63a139b97200b24 +size 384 diff --git a/margin_logs/step_0000244.npy b/margin_logs/step_0000244.npy new file mode 100644 index 0000000..0455ada --- /dev/null +++ b/margin_logs/step_0000244.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60b1be65e088b876368a7f4ccc94202636208952f6077610ea0b641b41598181 +size 384 diff --git a/margin_logs/step_0000245.npy b/margin_logs/step_0000245.npy new file mode 100644 index 0000000..4c5f281 --- /dev/null +++ b/margin_logs/step_0000245.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05f3aac18dedeaccac81070ced0447dda31d5fdf2515b979c80d29f7bbb22679 +size 384 diff --git a/margin_logs/step_0000246.npy b/margin_logs/step_0000246.npy new file mode 100644 index 0000000..9fbed21 --- /dev/null +++ b/margin_logs/step_0000246.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c3da1014ac98205900aea4a485369d20d905df0d6d16e718c2c6edbc74c8926 +size 384 diff --git a/margin_logs/step_0000247.npy b/margin_logs/step_0000247.npy new file mode 100644 index 0000000..ea151e7 --- /dev/null +++ b/margin_logs/step_0000247.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ee0932ea72aef34b0143c61472a529d74faa98eaa8a73f43c9fad2043b57314 +size 384 diff --git a/margin_logs/step_0000248.npy b/margin_logs/step_0000248.npy new file mode 100644 index 0000000..f52d7e5 --- /dev/null +++ b/margin_logs/step_0000248.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f999a1637d7dc6f3ec6815746c25c5d066fa82892f45a2d3ebf49f4d0e5308a +size 384 diff --git a/margin_logs/step_0000249.npy b/margin_logs/step_0000249.npy new file mode 100644 index 0000000..8fbb6a9 --- /dev/null +++ b/margin_logs/step_0000249.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d48237d99461110400c0596d3763e02cce9970fcbf0db0bf538f63f15b79414 +size 384 diff --git a/margin_logs/step_0000250.npy b/margin_logs/step_0000250.npy new file mode 100644 index 0000000..40f174a --- /dev/null +++ b/margin_logs/step_0000250.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f25825f439e3a1d6c5ca029f8d71800cd16a4441ca5d18dd101b1d61a49940c1 +size 384 diff --git a/margin_logs/step_0000251.npy b/margin_logs/step_0000251.npy new file mode 100644 index 0000000..dc9b756 --- /dev/null +++ b/margin_logs/step_0000251.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac571914afed53bce9e1a7e98726c941ce1535aa267969c7a2237b5931d39cee +size 384 diff --git a/margin_logs/step_0000252.npy b/margin_logs/step_0000252.npy new file mode 100644 index 0000000..303dc40 --- /dev/null +++ b/margin_logs/step_0000252.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52c0230e41508b3d612df74b5973f69cfc9c2161f3d9f1f4a39242ee7d7c0543 +size 384 diff --git a/margin_logs/step_0000253.npy b/margin_logs/step_0000253.npy new file mode 100644 index 0000000..40c161f --- /dev/null +++ b/margin_logs/step_0000253.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8351f14e13a9450e9c64a983a17ca2c27ec1a3bc66ba0cdbcca0f680b2302d50 +size 384 diff --git a/margin_logs/step_0000254.npy b/margin_logs/step_0000254.npy new file mode 100644 index 0000000..6276867 --- /dev/null +++ b/margin_logs/step_0000254.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d84aa89343981734baa26b1f966f3e7a53d3593dcd40bb50f94b5bb87c036db +size 384 diff --git a/margin_logs/step_0000255.npy b/margin_logs/step_0000255.npy new file mode 100644 index 0000000..9dee40c --- /dev/null +++ b/margin_logs/step_0000255.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:558048f94db9c11eef27a8c4c1229f6ab3ca969c78e8d8c1380be8c727a9c0aa +size 384 diff --git a/margin_logs/step_0000256.npy b/margin_logs/step_0000256.npy new file mode 100644 index 0000000..2ea2025 --- /dev/null +++ b/margin_logs/step_0000256.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3adc103030a41733c949429e64f308793101cca50cc09055fa1e1c8feb9d9f3a +size 384 diff --git a/margin_logs/step_0000257.npy b/margin_logs/step_0000257.npy new file mode 100644 index 0000000..dbcfc21 --- /dev/null +++ b/margin_logs/step_0000257.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11fd451c29504dd7796255363e5508767d9e3c8f6cd6973f4df3f89282f82d6e +size 384 diff --git a/margin_logs/step_0000258.npy b/margin_logs/step_0000258.npy new file mode 100644 index 0000000..de6d789 --- /dev/null +++ b/margin_logs/step_0000258.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10abcee1befec1dbdf3000a11b7a445bc9fb87fcf52d631c00ddf8df7ace297d +size 384 diff --git a/margin_logs/step_0000259.npy b/margin_logs/step_0000259.npy new file mode 100644 index 0000000..c632691 --- /dev/null +++ b/margin_logs/step_0000259.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3aa95e4edda2c561a0fd740a3828919b16b4c0f305f4b7b4b7c1100f9e347a4 +size 384 diff --git a/margin_logs/step_0000260.npy b/margin_logs/step_0000260.npy new file mode 100644 index 0000000..a9294e2 --- /dev/null +++ b/margin_logs/step_0000260.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fe41099d4094cf6f0607dcadf5103b1c7b4b9d6e9bb2f0c3fa298e9e3e3c7a5 +size 384 diff --git a/margin_logs/step_0000261.npy b/margin_logs/step_0000261.npy new file mode 100644 index 0000000..b0986ca --- /dev/null +++ b/margin_logs/step_0000261.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af6857644bb5e7f8deccd030cfa33e008708ff534546fe14328d5bb318d8a1f7 +size 384 diff --git a/margin_logs/step_0000262.npy b/margin_logs/step_0000262.npy new file mode 100644 index 0000000..0454942 --- /dev/null +++ b/margin_logs/step_0000262.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee0c772dff000fe4149a8526dbc4844e283c4568270385b5d22782f5c3cf7527 +size 384 diff --git a/margin_logs/step_0000263.npy b/margin_logs/step_0000263.npy new file mode 100644 index 0000000..d0f90e7 --- /dev/null +++ b/margin_logs/step_0000263.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b2a8675594e7ae590940a616ab46f1779287bfe1564014a48ab035f831520c3 +size 384 diff --git a/margin_logs/step_0000264.npy b/margin_logs/step_0000264.npy new file mode 100644 index 0000000..0707b1b --- /dev/null +++ b/margin_logs/step_0000264.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c482ea7c1ed7dce8f5585ad26e23406d10d0ffea2f1c24b31ac117755244360 +size 384 diff --git a/margin_logs/step_0000265.npy b/margin_logs/step_0000265.npy new file mode 100644 index 0000000..0c07570 --- /dev/null +++ b/margin_logs/step_0000265.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:373a655eebad2b0cbeec94d02cb3b7e93d1d67bbc016437ad5fd97138c4b62b0 +size 384 diff --git a/margin_logs/step_0000266.npy b/margin_logs/step_0000266.npy new file mode 100644 index 0000000..4b8d660 --- /dev/null +++ b/margin_logs/step_0000266.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3185785815dfc22542b281b95b5cb8a9c070b58a672475e85809d2f12edcc42 +size 384 diff --git a/margin_logs/step_0000267.npy b/margin_logs/step_0000267.npy new file mode 100644 index 0000000..1843cb4 --- /dev/null +++ b/margin_logs/step_0000267.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9434b86f7336b603ed243d3536eb6c10855c90e3aab93558bc22b482641a7179 +size 384 diff --git a/margin_logs/step_0000268.npy b/margin_logs/step_0000268.npy new file mode 100644 index 0000000..36a47b4 --- /dev/null +++ b/margin_logs/step_0000268.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5055756f2b13e50102455d2c7bae49a9c869c1767c9df1720240a584070e5a2 +size 384 diff --git a/margin_logs/step_0000269.npy b/margin_logs/step_0000269.npy new file mode 100644 index 0000000..0f4a268 --- /dev/null +++ b/margin_logs/step_0000269.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1b6eb0334556ae91f63c33e3b1613fd1d590afccdb1cb70ba014560f33c67bb +size 384 diff --git a/margin_logs/step_0000270.npy b/margin_logs/step_0000270.npy new file mode 100644 index 0000000..ade24c7 --- /dev/null +++ b/margin_logs/step_0000270.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0929e02e4e2123851892c5bae6c672dbc888d8d29fa0d2b63751929a61ce0f4f +size 384 diff --git a/margin_logs/step_0000271.npy b/margin_logs/step_0000271.npy new file mode 100644 index 0000000..db72f00 --- /dev/null +++ b/margin_logs/step_0000271.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86b52b68d71e94fb856331bd9761c26f4ad01def58725f2e621fce9ed2dae6d4 +size 384 diff --git a/margin_logs/step_0000272.npy b/margin_logs/step_0000272.npy new file mode 100644 index 0000000..010e1d8 --- /dev/null +++ b/margin_logs/step_0000272.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc1e90a65587554b9c68d9603908b6d247549c08f24b0c89443b682227dcb769 +size 384 diff --git a/margin_logs/step_0000273.npy b/margin_logs/step_0000273.npy new file mode 100644 index 0000000..f6b5af4 --- /dev/null +++ b/margin_logs/step_0000273.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcc99090703c567662ad4c925c8854d1ae104bf551bd0b050b7c6d249ba1cd64 +size 384 diff --git a/margin_logs/step_0000274.npy b/margin_logs/step_0000274.npy new file mode 100644 index 0000000..49a4b4c --- /dev/null +++ b/margin_logs/step_0000274.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:049c5ed8edc99078284b0373ba282cab09a8c18035af02d9d5f71b803179e22d +size 384 diff --git a/margin_logs/step_0000275.npy b/margin_logs/step_0000275.npy new file mode 100644 index 0000000..7a80867 --- /dev/null +++ b/margin_logs/step_0000275.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd79b20279353c9dadbbc64e84efe816eb9bcfdf6137ce8c1226201c160a43f4 +size 384 diff --git a/margin_logs/step_0000276.npy b/margin_logs/step_0000276.npy new file mode 100644 index 0000000..036a87c --- /dev/null +++ b/margin_logs/step_0000276.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37dd7a0ec7149cbe2baf155cde15d7faf620936e55483b5e244293b3dd371282 +size 384 diff --git a/margin_logs/step_0000277.npy b/margin_logs/step_0000277.npy new file mode 100644 index 0000000..46f74bb --- /dev/null +++ b/margin_logs/step_0000277.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14d246b4c85f5038b9b63c32574fe202c3f5c512ef3c30589dd2fa8687d97d06 +size 384 diff --git a/margin_logs/step_0000278.npy b/margin_logs/step_0000278.npy new file mode 100644 index 0000000..80f0245 --- /dev/null +++ b/margin_logs/step_0000278.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:084412f5c149e9e54e48e8272893563705ab3ff748faf757cb1a57bee8c8cddc +size 384 diff --git a/margin_logs/step_0000279.npy b/margin_logs/step_0000279.npy new file mode 100644 index 0000000..836dedf --- /dev/null +++ b/margin_logs/step_0000279.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6df2f0abafaf506f68933d9394b7f88dfd2c4e7423236b674901788d9ec2342 +size 384 diff --git a/margin_logs/step_0000280.npy b/margin_logs/step_0000280.npy new file mode 100644 index 0000000..d9955f2 --- /dev/null +++ b/margin_logs/step_0000280.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76ff8a8e6bc8a7a13bed2ca6a6ab263dd375a2bceb48818d9da2d8fbf47f5b0a +size 384 diff --git a/margin_logs/step_0000281.npy b/margin_logs/step_0000281.npy new file mode 100644 index 0000000..30eedac --- /dev/null +++ b/margin_logs/step_0000281.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60f24b91c4a302e0c9950e64d18fcdb6eb857f4270dfce30c7223ff687070ebd +size 384 diff --git a/margin_logs/step_0000282.npy b/margin_logs/step_0000282.npy new file mode 100644 index 0000000..8e5505f --- /dev/null +++ b/margin_logs/step_0000282.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1fbb7a7973779e8f63eea8eba0038f57ecfeca845ba713a67a58719a3dca3c0 +size 384 diff --git a/margin_logs/step_0000283.npy b/margin_logs/step_0000283.npy new file mode 100644 index 0000000..062749d --- /dev/null +++ b/margin_logs/step_0000283.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1972ccaa2f51f8e0cc4a4d4f0f4b11870238dd4fe2cda5a3a8fb14edfdd357de +size 384 diff --git a/margin_logs/step_0000284.npy b/margin_logs/step_0000284.npy new file mode 100644 index 0000000..eb479d0 --- /dev/null +++ b/margin_logs/step_0000284.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:217c03f7d82ecfe636aef74d7a2131982775a7e75d754b7fb030ed0bdc71cf52 +size 384 diff --git a/margin_logs/step_0000285.npy b/margin_logs/step_0000285.npy new file mode 100644 index 0000000..fa7f6e4 --- /dev/null +++ b/margin_logs/step_0000285.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b463c2e2f96d1c9ea7e193a22e964139cdcb060d98e0b39988a93d3fc1c8aa46 +size 384 diff --git a/margin_logs/step_0000286.npy b/margin_logs/step_0000286.npy new file mode 100644 index 0000000..f35082c --- /dev/null +++ b/margin_logs/step_0000286.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a12d016a8b9c0181643bc7f9ddfde0670c3dd070d6a9fa155087ef1cc7d3bcc +size 384 diff --git a/margin_logs/step_0000287.npy b/margin_logs/step_0000287.npy new file mode 100644 index 0000000..e8a7ffd --- /dev/null +++ b/margin_logs/step_0000287.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44c6b61e1d297192bf1582733945e45cb47212271e2b893e8ab238f3e8fa94b2 +size 384 diff --git a/margin_logs/step_0000288.npy b/margin_logs/step_0000288.npy new file mode 100644 index 0000000..5caafca --- /dev/null +++ b/margin_logs/step_0000288.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80700a57a5d1881299fbffb0e5aab5f2965f105fee23452ffce2fa0e0ec79271 +size 384 diff --git a/margin_logs/step_0000289.npy b/margin_logs/step_0000289.npy new file mode 100644 index 0000000..e903c08 --- /dev/null +++ b/margin_logs/step_0000289.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dcd3f1b904e38ef6eae6124926206b2425f2f20270149263bc4740911f00787 +size 384 diff --git a/margin_logs/step_0000290.npy b/margin_logs/step_0000290.npy new file mode 100644 index 0000000..26ecd86 --- /dev/null +++ b/margin_logs/step_0000290.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0aab6e25e41f7d51bb4dadf2487ff1961ceabcdfa36edb7c3f2001226f035936 +size 384 diff --git a/margin_logs/step_0000291.npy b/margin_logs/step_0000291.npy new file mode 100644 index 0000000..bc613e9 --- /dev/null +++ b/margin_logs/step_0000291.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c2aeadf4fa85906923607683b29e061a5323990aa9da8557e60146618a2c5a6 +size 384 diff --git a/margin_logs/step_0000292.npy b/margin_logs/step_0000292.npy new file mode 100644 index 0000000..c30f774 --- /dev/null +++ b/margin_logs/step_0000292.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68c90019c4f9378655fe151af08e1002d59a82f7faff52cef5f561f29fce23cf +size 384 diff --git a/margin_logs/step_0000293.npy b/margin_logs/step_0000293.npy new file mode 100644 index 0000000..3295e2e --- /dev/null +++ b/margin_logs/step_0000293.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b8a7436dffc442c254137f55b07eb8cd0c8a98df17bb6d195c2036ac57e1cbf +size 384 diff --git a/margin_logs/step_0000294.npy b/margin_logs/step_0000294.npy new file mode 100644 index 0000000..712649c --- /dev/null +++ b/margin_logs/step_0000294.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4aaee606448f0ff33670b1f83cbfd4409019c2fb183de27fa3f27214b8c17d86 +size 384 diff --git a/margin_logs/step_0000295.npy b/margin_logs/step_0000295.npy new file mode 100644 index 0000000..0c1b4aa --- /dev/null +++ b/margin_logs/step_0000295.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82377681891822412e5f512e0941e8687a342ff5f6f204d544ac02badd56119b +size 384 diff --git a/margin_logs/step_0000296.npy b/margin_logs/step_0000296.npy new file mode 100644 index 0000000..b397d91 --- /dev/null +++ b/margin_logs/step_0000296.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edf2fc76b96d4641ddae42654fc643a196898afbaaf5cac679a792f9eb37659c +size 384 diff --git a/margin_logs/step_0000297.npy b/margin_logs/step_0000297.npy new file mode 100644 index 0000000..51604eb --- /dev/null +++ b/margin_logs/step_0000297.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d300abfb1cf7efb3c13b6950d647d2c753a480b7f695fcd76830b63c5499404 +size 384 diff --git a/margin_logs/step_0000298.npy b/margin_logs/step_0000298.npy new file mode 100644 index 0000000..c262610 --- /dev/null +++ b/margin_logs/step_0000298.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:775441d8906a136ef3412a4f5e6d124fbc1b8510085b4e3a7fd2af7b3e305f4f +size 384 diff --git a/margin_logs/step_0000299.npy b/margin_logs/step_0000299.npy new file mode 100644 index 0000000..369b038 --- /dev/null +++ b/margin_logs/step_0000299.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d2ee5fb66ccfa4af4a62bf431f2410c222a775193561268ac21461c7a3a50dd +size 384 diff --git a/margin_logs/step_0000300.npy b/margin_logs/step_0000300.npy new file mode 100644 index 0000000..69da200 --- /dev/null +++ b/margin_logs/step_0000300.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40c78d2809fc58dd36fc1b57eb772e839dc85c323bff46e39a807640bf33c2bd +size 384 diff --git a/margin_logs/step_0000301.npy b/margin_logs/step_0000301.npy new file mode 100644 index 0000000..6a25ccc --- /dev/null +++ b/margin_logs/step_0000301.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fd131fd8718b1153b4ac2abd12dc9e970ceb1e28a61bab5d546ad2e5ce8e235 +size 384 diff --git a/margin_logs/step_0000302.npy b/margin_logs/step_0000302.npy new file mode 100644 index 0000000..8e450ce --- /dev/null +++ b/margin_logs/step_0000302.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48ae1069a8e17ba2d0c763e47b683bf006be7e914d358b3b5b27bbc9de9012b9 +size 384 diff --git a/margin_logs/step_0000303.npy b/margin_logs/step_0000303.npy new file mode 100644 index 0000000..94b354f --- /dev/null +++ b/margin_logs/step_0000303.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f4133ddf10817380dcb8c069daf5615bb6a1774673e68c3ee7c2757930f0b37 +size 384 diff --git a/margin_logs/step_0000304.npy b/margin_logs/step_0000304.npy new file mode 100644 index 0000000..d72ccaf --- /dev/null +++ b/margin_logs/step_0000304.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:690d90c8b74f9a0471b58b38a5ef88c58a7656b88720a0b742d152684f5e07df +size 384 diff --git a/margin_logs/step_0000305.npy b/margin_logs/step_0000305.npy new file mode 100644 index 0000000..6066bd6 --- /dev/null +++ b/margin_logs/step_0000305.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17ac520092ae3686df069f3afa223b0ad09ec07c84aa25632aeca1a89fe5cc9a +size 384 diff --git a/margin_logs/step_0000306.npy b/margin_logs/step_0000306.npy new file mode 100644 index 0000000..dd718ab --- /dev/null +++ b/margin_logs/step_0000306.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4eb1d468b1c450c3593f78aab98469b05a6f0cb4eef7acf47cae81642a662415 +size 384 diff --git a/margin_logs/step_0000307.npy b/margin_logs/step_0000307.npy new file mode 100644 index 0000000..f0e1dcd --- /dev/null +++ b/margin_logs/step_0000307.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b69a816e8fe4fe3007db9f20d709b4a0346b54fcdbb970d0ec955a9668fc7748 +size 384 diff --git a/margin_logs/step_0000308.npy b/margin_logs/step_0000308.npy new file mode 100644 index 0000000..eff6820 --- /dev/null +++ b/margin_logs/step_0000308.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83579db0b45b596585e113f1eefc7f620c0569f4008e9495a97bf3441b209d9d +size 384 diff --git a/margin_logs/step_0000309.npy b/margin_logs/step_0000309.npy new file mode 100644 index 0000000..8ba1852 --- /dev/null +++ b/margin_logs/step_0000309.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3505c80ec6e8d6ac384a133f2a7c28b9603c5479920e13a254772aa4c683c621 +size 384 diff --git a/margin_logs/step_0000310.npy b/margin_logs/step_0000310.npy new file mode 100644 index 0000000..ae7ff60 --- /dev/null +++ b/margin_logs/step_0000310.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67c00a491363f99eb527f9bc4114dc082892c3ac59a7f01494d65ea58a72ab98 +size 384 diff --git a/margin_logs/step_0000311.npy b/margin_logs/step_0000311.npy new file mode 100644 index 0000000..fcbe4eb --- /dev/null +++ b/margin_logs/step_0000311.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ab68756eac1e373ed58e4a41e5925a2fd1a887ca7e9dc1e206f14a1e97daa20 +size 384 diff --git a/margin_logs/step_0000312.npy b/margin_logs/step_0000312.npy new file mode 100644 index 0000000..d82f83d --- /dev/null +++ b/margin_logs/step_0000312.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67d91df67264061dcfe6e1ddb5dbd4c79c4853b53b7b9327deae3f2255c2671b +size 384 diff --git a/margin_logs/step_0000313.npy b/margin_logs/step_0000313.npy new file mode 100644 index 0000000..6061be6 --- /dev/null +++ b/margin_logs/step_0000313.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b97c0e8ef71e79287717bd7690026f53f25df228ca63e907ef8805884b71e119 +size 384 diff --git a/margin_logs/step_0000314.npy b/margin_logs/step_0000314.npy new file mode 100644 index 0000000..77eec7e --- /dev/null +++ b/margin_logs/step_0000314.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d4a58f2690a73e931d1c36f83689b5a583764432df27fd5e8c9d70294657727 +size 384 diff --git a/margin_logs/step_0000315.npy b/margin_logs/step_0000315.npy new file mode 100644 index 0000000..da9a5c4 --- /dev/null +++ b/margin_logs/step_0000315.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6a64a62f8753869404aa7cae12f8c64d708b701c9dfafb75f18fcb1becf25cb +size 384 diff --git a/margin_logs/step_0000316.npy b/margin_logs/step_0000316.npy new file mode 100644 index 0000000..4ead71b --- /dev/null +++ b/margin_logs/step_0000316.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b37abecd0ad01c89e6ea73fb6fa4cd9f8de57f89618bd3d7e2ca67957f9f6db9 +size 384 diff --git a/margin_logs/step_0000317.npy b/margin_logs/step_0000317.npy new file mode 100644 index 0000000..c15e6ab --- /dev/null +++ b/margin_logs/step_0000317.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b08ddea775126c9c91110806c11d971d631bc417b67e7b8a12c2c3cb720f5c36 +size 384 diff --git a/margin_logs/step_0000318.npy b/margin_logs/step_0000318.npy new file mode 100644 index 0000000..a39d96f --- /dev/null +++ b/margin_logs/step_0000318.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea679677d3124a22e1bf79160eec9bafa85022aba4d35df60af26c18957bdb6a +size 384 diff --git a/margin_logs/step_0000319.npy b/margin_logs/step_0000319.npy new file mode 100644 index 0000000..229cbd5 --- /dev/null +++ b/margin_logs/step_0000319.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72b44334c7bb47e03b53969f4ec331ea06e3f2f214f972d006443d8e5a9fe1f7 +size 384 diff --git a/margin_logs/step_0000320.npy b/margin_logs/step_0000320.npy new file mode 100644 index 0000000..9c94710 --- /dev/null +++ b/margin_logs/step_0000320.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9856697de656735704354ed8f2100bd0055332895a4404fe170cfe0a14800692 +size 384 diff --git a/margin_logs/step_0000321.npy b/margin_logs/step_0000321.npy new file mode 100644 index 0000000..2886b50 --- /dev/null +++ b/margin_logs/step_0000321.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5676bc7dc55134d693f955c5c6da7610fc83cc2405933cb9f92f0d2ad3d472b0 +size 384 diff --git a/margin_logs/step_0000322.npy b/margin_logs/step_0000322.npy new file mode 100644 index 0000000..4fb8c59 --- /dev/null +++ b/margin_logs/step_0000322.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b235cece5e91bdb118596ae3bf700e5fd9dd1eb1614da7b6114445903e6e9cc +size 384 diff --git a/margin_logs/step_0000323.npy b/margin_logs/step_0000323.npy new file mode 100644 index 0000000..d57de54 --- /dev/null +++ b/margin_logs/step_0000323.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8d1008e9f815d5aef89765fbb260c48a1111a71960cefc74b228ce4895bcd4c +size 384 diff --git a/margin_logs/step_0000324.npy b/margin_logs/step_0000324.npy new file mode 100644 index 0000000..976e3c6 --- /dev/null +++ b/margin_logs/step_0000324.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c9e4c25a475ebd73e857c0aea14db29f9d378ebf5db7024cd9e6e1bdee96b48 +size 384 diff --git a/margin_logs/step_0000325.npy b/margin_logs/step_0000325.npy new file mode 100644 index 0000000..5d6cb1d --- /dev/null +++ b/margin_logs/step_0000325.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbea648d51a9896d06dbff0c8765c26b8e754eb2ef8e873fac2b0d83c3149ba8 +size 384 diff --git a/margin_logs/step_0000326.npy b/margin_logs/step_0000326.npy new file mode 100644 index 0000000..a282872 --- /dev/null +++ b/margin_logs/step_0000326.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83de02f3d91044cc19c3a13286f25610d5e4c5a80ef013934cbabe08e4a8cea0 +size 384 diff --git a/margin_logs/step_0000327.npy b/margin_logs/step_0000327.npy new file mode 100644 index 0000000..fcc2916 --- /dev/null +++ b/margin_logs/step_0000327.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7aa68e3eb91a4f94c3c032c091a9218e277512988cf5f34f812d5fe9f06a7f0 +size 384 diff --git a/margin_logs/step_0000328.npy b/margin_logs/step_0000328.npy new file mode 100644 index 0000000..52f1b85 --- /dev/null +++ b/margin_logs/step_0000328.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc954012dd8546ea782fdc6a2ce1a9db67f589722966fa3bd3a98712ae161e1f +size 384 diff --git a/margin_logs/step_0000329.npy b/margin_logs/step_0000329.npy new file mode 100644 index 0000000..4ab2dad --- /dev/null +++ b/margin_logs/step_0000329.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58ceb645d37e95884ddd315de7bcde350296bf9f61e2d07520bcdc373ebb80ec +size 384 diff --git a/margin_logs/step_0000330.npy b/margin_logs/step_0000330.npy new file mode 100644 index 0000000..bb1e20d --- /dev/null +++ b/margin_logs/step_0000330.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18ee424e630bc90ad251fcc49bd7aac017a955145c14ea09083aa5b32f75c367 +size 384 diff --git a/margin_logs/step_0000331.npy b/margin_logs/step_0000331.npy new file mode 100644 index 0000000..7a04827 --- /dev/null +++ b/margin_logs/step_0000331.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ddbd5ca93b2bb04aa319685412c469663143726aeefda5880f652a91fa792e2 +size 384 diff --git a/margin_logs/step_0000332.npy b/margin_logs/step_0000332.npy new file mode 100644 index 0000000..f5dbac5 --- /dev/null +++ b/margin_logs/step_0000332.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d16a8a038fcbca78d2a0c85cace308e3b63d493ffc40babb25f03d6c0561d40d +size 384 diff --git a/margin_logs/step_0000333.npy b/margin_logs/step_0000333.npy new file mode 100644 index 0000000..078e92f --- /dev/null +++ b/margin_logs/step_0000333.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f83d79d57b3cdbf5a821817476b2c9e5f2b304b804119947cbf7404495bf1d32 +size 384 diff --git a/margin_logs/step_0000334.npy b/margin_logs/step_0000334.npy new file mode 100644 index 0000000..2fb35f6 --- /dev/null +++ b/margin_logs/step_0000334.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd70d9a9c59128db989720b9be0e2241d7ca58d953d5170e2c6480e428a9f43f +size 384 diff --git a/margin_logs/step_0000335.npy b/margin_logs/step_0000335.npy new file mode 100644 index 0000000..24d513e --- /dev/null +++ b/margin_logs/step_0000335.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2aae818e08819728f2d7307982f0e90c8153880a62415275f88149bf4fb9bee7 +size 384 diff --git a/margin_logs/step_0000336.npy b/margin_logs/step_0000336.npy new file mode 100644 index 0000000..f46b4c7 --- /dev/null +++ b/margin_logs/step_0000336.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7aae5f7b9edac382537ab823f3b637a0ca6ad73dde632644ac326abeb1512e10 +size 384 diff --git a/margin_logs/step_0000337.npy b/margin_logs/step_0000337.npy new file mode 100644 index 0000000..d4ca59b --- /dev/null +++ b/margin_logs/step_0000337.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f90536be1ba542a8c2dca8db41112d5c5855cb05a56ad6b18280bbe69ec8f61 +size 384 diff --git a/margin_logs/step_0000338.npy b/margin_logs/step_0000338.npy new file mode 100644 index 0000000..d4e1122 --- /dev/null +++ b/margin_logs/step_0000338.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c9337fda209db92a737527f24af55f92979a32bf322e4fca9a79ea1192ffe80 +size 384 diff --git a/margin_logs/step_0000339.npy b/margin_logs/step_0000339.npy new file mode 100644 index 0000000..2caef15 --- /dev/null +++ b/margin_logs/step_0000339.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:208344e61ebe726686a288948be455aec5532cfc7d281066dabb33feec137280 +size 384 diff --git a/margin_logs/step_0000340.npy b/margin_logs/step_0000340.npy new file mode 100644 index 0000000..a00b8c9 --- /dev/null +++ b/margin_logs/step_0000340.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6734410084765cd65e30a86605d904c6f6ad83f6b8feb805faacf8528e5b2309 +size 384 diff --git a/margin_logs/step_0000341.npy b/margin_logs/step_0000341.npy new file mode 100644 index 0000000..086ca32 --- /dev/null +++ b/margin_logs/step_0000341.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe62b6a660bdd8ab525299690a33940f16a28db21d844056cefe2b3e297b0268 +size 384 diff --git a/margin_logs/step_0000342.npy b/margin_logs/step_0000342.npy new file mode 100644 index 0000000..a36d396 --- /dev/null +++ b/margin_logs/step_0000342.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d146041211e591355d16393f8e370a2af71098fef451bb708d13b1bfb45f60dd +size 384 diff --git a/margin_logs/step_0000343.npy b/margin_logs/step_0000343.npy new file mode 100644 index 0000000..6650836 --- /dev/null +++ b/margin_logs/step_0000343.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:770306ea44d6a9d4008bc0d4a657de27b19f6bf1fd6bb171f5f818e45684d772 +size 384 diff --git a/margin_logs/step_0000344.npy b/margin_logs/step_0000344.npy new file mode 100644 index 0000000..99c8f4b --- /dev/null +++ b/margin_logs/step_0000344.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:388c81112402530e80c512bc5f2f8fa1c37a21b628c08635d79861ba9a3624f2 +size 384 diff --git a/margin_logs/step_0000345.npy b/margin_logs/step_0000345.npy new file mode 100644 index 0000000..dcbebc0 --- /dev/null +++ b/margin_logs/step_0000345.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab05279afd0d202268870937a733f20931e33869a841bd5a57e9cb597a1e14dd +size 384 diff --git a/margin_logs/step_0000346.npy b/margin_logs/step_0000346.npy new file mode 100644 index 0000000..f4bf695 --- /dev/null +++ b/margin_logs/step_0000346.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70516220e7d210fd84e872600269dd76630a2588ca619288aefa00b7ef88d54e +size 384 diff --git a/margin_logs/step_0000347.npy b/margin_logs/step_0000347.npy new file mode 100644 index 0000000..17031b7 --- /dev/null +++ b/margin_logs/step_0000347.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a90732d5b9fdb36493ca3bbd60541256d2ab23f28b32b7e858990e4118b8458 +size 384 diff --git a/margin_logs/step_0000348.npy b/margin_logs/step_0000348.npy new file mode 100644 index 0000000..9ee25df --- /dev/null +++ b/margin_logs/step_0000348.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b2cd50a51c17ac02cb8ad328820195a265a6f1345b6b8e522a57e9ffa4dbeaf +size 384 diff --git a/margin_logs/step_0000349.npy b/margin_logs/step_0000349.npy new file mode 100644 index 0000000..5b64974 --- /dev/null +++ b/margin_logs/step_0000349.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c92ec8a46e1e38233b5d641013852ae6fac0f98ddad6b321570cf9978a09164 +size 384 diff --git a/margin_logs/step_0000350.npy b/margin_logs/step_0000350.npy new file mode 100644 index 0000000..00e5694 --- /dev/null +++ b/margin_logs/step_0000350.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:252ec67aa0f7e5ac477176682cf52190e436a013ac3af992acd5a2fd670098a0 +size 384 diff --git a/margin_logs/step_0000351.npy b/margin_logs/step_0000351.npy new file mode 100644 index 0000000..68ad169 --- /dev/null +++ b/margin_logs/step_0000351.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:761f9f7584e6e80c6f49fe133fbd8956ace237ece5174225b804131cb6f81142 +size 384 diff --git a/margin_logs/step_0000352.npy b/margin_logs/step_0000352.npy new file mode 100644 index 0000000..d652de7 --- /dev/null +++ b/margin_logs/step_0000352.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b6ac1de10a4053befaa867ebb962be1ac564d00b83bbed154eff33e84bdddb2 +size 384 diff --git a/margin_logs/step_0000353.npy b/margin_logs/step_0000353.npy new file mode 100644 index 0000000..07fb8cb --- /dev/null +++ b/margin_logs/step_0000353.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e1405cdfb3f842dd016d59d28da55579eb09bc8af2d1015a5d2028d696be41e +size 384 diff --git a/margin_logs/step_0000354.npy b/margin_logs/step_0000354.npy new file mode 100644 index 0000000..8f4dadd --- /dev/null +++ b/margin_logs/step_0000354.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a824885af080c16748c5c60bddc342391e9489d0cb1dd626c531616feb87f4a8 +size 384 diff --git a/margin_logs/step_0000355.npy b/margin_logs/step_0000355.npy new file mode 100644 index 0000000..d832cc0 --- /dev/null +++ b/margin_logs/step_0000355.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b0836bb4741ac450de6f8520ba3fcd28e5418ff1b64f886c8a6904b75752099 +size 384 diff --git a/margin_logs/step_0000356.npy b/margin_logs/step_0000356.npy new file mode 100644 index 0000000..36470d6 --- /dev/null +++ b/margin_logs/step_0000356.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ade8bea789919a57d324e73fab9bbf94ee41e1b20aa11dc159f91f087228abde +size 384 diff --git a/margin_logs/step_0000357.npy b/margin_logs/step_0000357.npy new file mode 100644 index 0000000..ef2c3d0 --- /dev/null +++ b/margin_logs/step_0000357.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d55c95e78f7a990ef789ec7dbcb3d6c2a16ed298a46a61fbba212c1b74827732 +size 384 diff --git a/margin_logs/step_0000358.npy b/margin_logs/step_0000358.npy new file mode 100644 index 0000000..8cce58d --- /dev/null +++ b/margin_logs/step_0000358.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e6f553c563de5185131d04e1fc862c738b1a68742a099f9573b055d59b47a88 +size 384 diff --git a/margin_logs/step_0000359.npy b/margin_logs/step_0000359.npy new file mode 100644 index 0000000..9c29175 --- /dev/null +++ b/margin_logs/step_0000359.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb7a4a298d3cfb24c2d947cd2d27b35bc87223a9e1bc77528289c12bd7e58319 +size 384 diff --git a/margin_logs/step_0000360.npy b/margin_logs/step_0000360.npy new file mode 100644 index 0000000..a21d8d2 --- /dev/null +++ b/margin_logs/step_0000360.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1acf154482567954a95b83f1058ccff3cdd684335285eb95cf8c58244529699a +size 384 diff --git a/margin_logs/step_0000361.npy b/margin_logs/step_0000361.npy new file mode 100644 index 0000000..bea4b42 --- /dev/null +++ b/margin_logs/step_0000361.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46f27435f5a449a51811a2838941766a7ae7e3649b21bd49310e7b95c4502165 +size 384 diff --git a/margin_logs/step_0000362.npy b/margin_logs/step_0000362.npy new file mode 100644 index 0000000..a4584d5 --- /dev/null +++ b/margin_logs/step_0000362.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a757e7a0ea8b8404228738269043ef3ae05da83e74eaa85d54aa25da21d62b +size 384 diff --git a/margin_logs/step_0000363.npy b/margin_logs/step_0000363.npy new file mode 100644 index 0000000..2df70d1 --- /dev/null +++ b/margin_logs/step_0000363.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e2ffa02432199905e68b34d78dd208cfa83c53e1aaafed828049f9651115392 +size 384 diff --git a/margin_logs/step_0000364.npy b/margin_logs/step_0000364.npy new file mode 100644 index 0000000..c04d622 --- /dev/null +++ b/margin_logs/step_0000364.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9745c030f2c28f07c976c7d72300bf02ccdff76e817adde11b994f2f7e8a059e +size 384 diff --git a/margin_logs/step_0000365.npy b/margin_logs/step_0000365.npy new file mode 100644 index 0000000..527698f --- /dev/null +++ b/margin_logs/step_0000365.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:138de56ca26f89d5f6776a5465efcb9f345b64b9370d4130a8fd6e5a60bb9fb0 +size 384 diff --git a/margin_logs/step_0000366.npy b/margin_logs/step_0000366.npy new file mode 100644 index 0000000..ece2a91 --- /dev/null +++ b/margin_logs/step_0000366.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:879f0e1b9df41ac86b4185c66d41804139486f7df5eb2ed02d26762fa3a90070 +size 384 diff --git a/margin_logs/step_0000367.npy b/margin_logs/step_0000367.npy new file mode 100644 index 0000000..8ba4fad --- /dev/null +++ b/margin_logs/step_0000367.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83fcd2605c4b14f5337f91043e3b76fda871be2596d0b0bb679c37028a519db5 +size 384 diff --git a/margin_logs/step_0000368.npy b/margin_logs/step_0000368.npy new file mode 100644 index 0000000..b250593 --- /dev/null +++ b/margin_logs/step_0000368.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9646666481d4d914800e0bef768a401bcf30dd0ff54a788152892e9c7049bfd +size 384 diff --git a/margin_logs/step_0000369.npy b/margin_logs/step_0000369.npy new file mode 100644 index 0000000..03d4088 --- /dev/null +++ b/margin_logs/step_0000369.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8078db149c52c0f411a8040299c4c6169c707e83bb25a93173938096e21de519 +size 384 diff --git a/margin_logs/step_0000370.npy b/margin_logs/step_0000370.npy new file mode 100644 index 0000000..1b948f7 --- /dev/null +++ b/margin_logs/step_0000370.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f478490cb2adc910920cda8d66960230f60570b9a8864595a1447485718bb160 +size 384 diff --git a/margin_logs/step_0000371.npy b/margin_logs/step_0000371.npy new file mode 100644 index 0000000..de43bfc --- /dev/null +++ b/margin_logs/step_0000371.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59946ced0d20ca7b12e86d13b71da958eec18f5f77727934199659c141ffd1a4 +size 384 diff --git a/margin_logs/step_0000372.npy b/margin_logs/step_0000372.npy new file mode 100644 index 0000000..9234bd1 --- /dev/null +++ b/margin_logs/step_0000372.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c107d0980078b50dc137f847f10048ab29acc5a6a7517db471fb197d1bfeac51 +size 384 diff --git a/margin_logs/step_0000373.npy b/margin_logs/step_0000373.npy new file mode 100644 index 0000000..dc1de08 --- /dev/null +++ b/margin_logs/step_0000373.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90ee15feea586ac045051f0fb8c5c55ddb4996a35a8580cce146d6a828558968 +size 384 diff --git a/margin_logs/step_0000374.npy b/margin_logs/step_0000374.npy new file mode 100644 index 0000000..ab3184f --- /dev/null +++ b/margin_logs/step_0000374.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e54c34882c0dae9cc8d9e1861a3eb336a4093e9a5f3adac1d7697b4a10df42d +size 384 diff --git a/margin_logs/step_0000375.npy b/margin_logs/step_0000375.npy new file mode 100644 index 0000000..934ba0e --- /dev/null +++ b/margin_logs/step_0000375.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d962f785911d33daa3e9ed058cc5fa1cbaf8c103f25a3325082589bb67661f3 +size 384 diff --git a/margin_logs/step_0000376.npy b/margin_logs/step_0000376.npy new file mode 100644 index 0000000..6a52960 --- /dev/null +++ b/margin_logs/step_0000376.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74f9c99abb57580536f8097c6353632f151399e545a391c769e2712f8a66edb0 +size 384 diff --git a/margin_logs/step_0000377.npy b/margin_logs/step_0000377.npy new file mode 100644 index 0000000..4b1ed86 --- /dev/null +++ b/margin_logs/step_0000377.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35124e30db2ee4a1a4f2784e818d8cd282dbf9c8539bd57f1053702a45316acc +size 384 diff --git a/margin_logs/step_0000378.npy b/margin_logs/step_0000378.npy new file mode 100644 index 0000000..7a6a278 --- /dev/null +++ b/margin_logs/step_0000378.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f582f96e25aefc6d685b81b4bf1e95460423bfb926e6d399da006fc0a23dd723 +size 384 diff --git a/margin_logs/step_0000379.npy b/margin_logs/step_0000379.npy new file mode 100644 index 0000000..ff80d9d --- /dev/null +++ b/margin_logs/step_0000379.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06880c61c88f16ec8d44f44150745a68798198c19aa66ac2efa530558d4e9dc1 +size 384 diff --git a/margin_logs/step_0000380.npy b/margin_logs/step_0000380.npy new file mode 100644 index 0000000..7b4d852 --- /dev/null +++ b/margin_logs/step_0000380.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:386bc6cfb09fccd87fa1d6219fd93ffc956f2f253e30bd3ead02074a5adbfddf +size 384 diff --git a/margin_logs/step_0000381.npy b/margin_logs/step_0000381.npy new file mode 100644 index 0000000..fae98d9 --- /dev/null +++ b/margin_logs/step_0000381.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88ad98d1783af694e51fdadcd32c1952b8ddc903ba832aff258ff72aeceab39b +size 384 diff --git a/margin_logs/step_0000382.npy b/margin_logs/step_0000382.npy new file mode 100644 index 0000000..56a32c0 --- /dev/null +++ b/margin_logs/step_0000382.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0df694c350402b12b2e9701d85f0126ac156670d000437cb0960ae7e268624b8 +size 384 diff --git a/margin_logs/step_0000383.npy b/margin_logs/step_0000383.npy new file mode 100644 index 0000000..bbc4c61 --- /dev/null +++ b/margin_logs/step_0000383.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd5a2c48015409e5605b113a2424ff3204d3cff8a2455c2626739e7f35fe6d03 +size 384 diff --git a/margin_logs/step_0000384.npy b/margin_logs/step_0000384.npy new file mode 100644 index 0000000..106c3b4 --- /dev/null +++ b/margin_logs/step_0000384.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35d2f39b315b6d949310db2940ffb18f20b742303c7017988005e39109ad94cc +size 384 diff --git a/margin_logs/step_0000385.npy b/margin_logs/step_0000385.npy new file mode 100644 index 0000000..99a2c57 --- /dev/null +++ b/margin_logs/step_0000385.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d310cfcebaa0e1ab3f04132d5e281c88329f512d4ed4f1f7f51976108b1acc6 +size 384 diff --git a/margin_logs/step_0000386.npy b/margin_logs/step_0000386.npy new file mode 100644 index 0000000..7051614 --- /dev/null +++ b/margin_logs/step_0000386.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aad5d68c1457747f9e3a758b55ceca4b99fcd6c1144d29e5e5d0ecff272f56f2 +size 384 diff --git a/margin_logs/step_0000387.npy b/margin_logs/step_0000387.npy new file mode 100644 index 0000000..e1b636e --- /dev/null +++ b/margin_logs/step_0000387.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac83d7f0c67f8eef458135a02d13712a50404159b26874088e466286e3e2e7fc +size 384 diff --git a/margin_logs/step_0000388.npy b/margin_logs/step_0000388.npy new file mode 100644 index 0000000..36fc5cc --- /dev/null +++ b/margin_logs/step_0000388.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6b7c95748a0edf88837fbb6e0ca45135d24e0bad8cdb6d40d1c04f90ddfbe7e +size 384 diff --git a/margin_logs/step_0000389.npy b/margin_logs/step_0000389.npy new file mode 100644 index 0000000..8cd51f6 --- /dev/null +++ b/margin_logs/step_0000389.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:253a03c1f35269a872303c6e325e1062bb5a576f41153baec075f04ffff73094 +size 384 diff --git a/margin_logs/step_0000390.npy b/margin_logs/step_0000390.npy new file mode 100644 index 0000000..2456cfb --- /dev/null +++ b/margin_logs/step_0000390.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a63bbae3c582a86b52d10de7ee904d336788bf9de6dfb0b7096ff2552a4e09bf +size 384 diff --git a/margin_logs/step_0000391.npy b/margin_logs/step_0000391.npy new file mode 100644 index 0000000..820dca2 --- /dev/null +++ b/margin_logs/step_0000391.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a6e22e4edf27ec3cf7d5dd22da8782a8fdb1e82fefe3fac2e9ddd3d6754c89c +size 384 diff --git a/margin_logs/step_0000392.npy b/margin_logs/step_0000392.npy new file mode 100644 index 0000000..3daa3cd --- /dev/null +++ b/margin_logs/step_0000392.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f63bfd9c3e137fa0b318057010fdedbcacbdc42a9a804db95d36f64d03eeebb8 +size 384 diff --git a/margin_logs/step_0000393.npy b/margin_logs/step_0000393.npy new file mode 100644 index 0000000..f38e93d --- /dev/null +++ b/margin_logs/step_0000393.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8722f7eda5a3572b47c3987809c0fdf27a5a83ec4006c06145f6170b33c78197 +size 384 diff --git a/margin_logs/step_0000394.npy b/margin_logs/step_0000394.npy new file mode 100644 index 0000000..0c938c1 --- /dev/null +++ b/margin_logs/step_0000394.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1130488379c2fc1026bc9c08a7db6180482910b5cc8420216bf395f6154c008 +size 384 diff --git a/margin_logs/step_0000395.npy b/margin_logs/step_0000395.npy new file mode 100644 index 0000000..b87fd36 --- /dev/null +++ b/margin_logs/step_0000395.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52149ff0207d39b4b89e17ec1fe42da9df3297a6b31c71bdd0bfcf9d895dac63 +size 384 diff --git a/margin_logs/step_0000396.npy b/margin_logs/step_0000396.npy new file mode 100644 index 0000000..468bc50 --- /dev/null +++ b/margin_logs/step_0000396.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c40a4d61e62b6b6c1e8659c881ab54eb98c3ed3118058898714751117f527f19 +size 384 diff --git a/margin_logs/step_0000397.npy b/margin_logs/step_0000397.npy new file mode 100644 index 0000000..c6d0421 --- /dev/null +++ b/margin_logs/step_0000397.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:459f51598a550a71d8cdca6f4a3048db1c2e002839235abe2b4a1b05e45bfca3 +size 384 diff --git a/margin_logs/step_0000398.npy b/margin_logs/step_0000398.npy new file mode 100644 index 0000000..d5ceefc --- /dev/null +++ b/margin_logs/step_0000398.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23331741c3b214d766e76064546b758cbe39c119916b10032fcfa1625bf3ab81 +size 384 diff --git a/margin_logs/step_0000399.npy b/margin_logs/step_0000399.npy new file mode 100644 index 0000000..9408ff3 --- /dev/null +++ b/margin_logs/step_0000399.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac9ea2d03a79c15de6df46bc62bc4a9aea92238c7f33a7494cb437caae8df230 +size 384 diff --git a/margin_logs/step_0000400.npy b/margin_logs/step_0000400.npy new file mode 100644 index 0000000..c7fd911 --- /dev/null +++ b/margin_logs/step_0000400.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6aec34159a09c8dfdd889ea7fd5647713b3cec40656e8d3d04dc51b88abde7f6 +size 384 diff --git a/margin_logs/step_0000401.npy b/margin_logs/step_0000401.npy new file mode 100644 index 0000000..6ab462b --- /dev/null +++ b/margin_logs/step_0000401.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ac33a29d65866e7718d5e0927a26155c1d9606b725eb082bf12d4456f9a6be1 +size 384 diff --git a/margin_logs/step_0000402.npy b/margin_logs/step_0000402.npy new file mode 100644 index 0000000..f9dd323 --- /dev/null +++ b/margin_logs/step_0000402.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea0e6604157c40dcb66ec88180ffeb88bae8a3458aca12401881d0c0fb590322 +size 384 diff --git a/margin_logs/step_0000403.npy b/margin_logs/step_0000403.npy new file mode 100644 index 0000000..7eeee00 --- /dev/null +++ b/margin_logs/step_0000403.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8057f269bad14598863db22d5ac5b9bdf870c3f9e36b37e9cce5fe66220fa085 +size 384 diff --git a/margin_logs/step_0000404.npy b/margin_logs/step_0000404.npy new file mode 100644 index 0000000..784f683 --- /dev/null +++ b/margin_logs/step_0000404.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd37915275708582de869426998049c492aef93bba788f5fed3fc45a9840547b +size 384 diff --git a/margin_logs/step_0000405.npy b/margin_logs/step_0000405.npy new file mode 100644 index 0000000..6466398 --- /dev/null +++ b/margin_logs/step_0000405.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e81cb4bb3e4eb03eb61e65f8edb85e262d884659c1e4f62dafba7b509ebe5f21 +size 384 diff --git a/margin_logs/step_0000406.npy b/margin_logs/step_0000406.npy new file mode 100644 index 0000000..9c650db --- /dev/null +++ b/margin_logs/step_0000406.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3840da7e978d373e384beab6eb2e7a926465bb217cf6da3c948e946ec3025920 +size 384 diff --git a/margin_logs/step_0000407.npy b/margin_logs/step_0000407.npy new file mode 100644 index 0000000..3ef162a --- /dev/null +++ b/margin_logs/step_0000407.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df4cbdea87a063b87e5535125be1a4d46857004d89c4cda4efb1fd4e35e07c7a +size 384 diff --git a/margin_logs/step_0000408.npy b/margin_logs/step_0000408.npy new file mode 100644 index 0000000..ada06d9 --- /dev/null +++ b/margin_logs/step_0000408.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a86c6514eeef67d05124f51b3b9bd816d9fd09a02e57f8911ec768cc632a8bc +size 384 diff --git a/margin_logs/step_0000409.npy b/margin_logs/step_0000409.npy new file mode 100644 index 0000000..40d8a88 --- /dev/null +++ b/margin_logs/step_0000409.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7f7d471a0c47f4a8bbb542f48beb9b81235dc76d909d09b90a03d8f9ba3fb62 +size 384 diff --git a/margin_logs/step_0000410.npy b/margin_logs/step_0000410.npy new file mode 100644 index 0000000..c3eec3c --- /dev/null +++ b/margin_logs/step_0000410.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32ba48cb43e3f8a50d1d04c62bd6ad5c215f3db631b39d77d61627cfcb924104 +size 384 diff --git a/margin_logs/step_0000411.npy b/margin_logs/step_0000411.npy new file mode 100644 index 0000000..44dd9b5 --- /dev/null +++ b/margin_logs/step_0000411.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00b6095a1e81d6d00bfeb63d92d85bd60b3d863f8c744d3af0b6de8e323ada00 +size 384 diff --git a/margin_logs/step_0000412.npy b/margin_logs/step_0000412.npy new file mode 100644 index 0000000..29fd6e7 --- /dev/null +++ b/margin_logs/step_0000412.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eab6cb559a00408a7256e01a7aaf8b4456ffb05b7f0c331f4cf097f985609b03 +size 384 diff --git a/margin_logs/step_0000413.npy b/margin_logs/step_0000413.npy new file mode 100644 index 0000000..3080fd5 --- /dev/null +++ b/margin_logs/step_0000413.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd5795d1e97572f086c505109a9c62fb6a4868aad5991c9be86d094a20732d97 +size 384 diff --git a/margin_logs/step_0000414.npy b/margin_logs/step_0000414.npy new file mode 100644 index 0000000..06cf8ef --- /dev/null +++ b/margin_logs/step_0000414.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fa1b37d1b932efc5a91a5c551c133b9814c034f207ff9b4beee3dace6398ceb +size 384 diff --git a/margin_logs/step_0000415.npy b/margin_logs/step_0000415.npy new file mode 100644 index 0000000..99585a3 --- /dev/null +++ b/margin_logs/step_0000415.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abbffb9d6c8d06498b2debe43ae7a7d809f6c1033822375554e42f53b60b172e +size 384 diff --git a/margin_logs/step_0000416.npy b/margin_logs/step_0000416.npy new file mode 100644 index 0000000..35fce4e --- /dev/null +++ b/margin_logs/step_0000416.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ddd8820332c446dcd607733b06ab534b149f39e6502cc69a3c9a985ef5fecee +size 384 diff --git a/margin_logs/step_0000417.npy b/margin_logs/step_0000417.npy new file mode 100644 index 0000000..9055d23 --- /dev/null +++ b/margin_logs/step_0000417.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8681ba414225bd33ca1090d2be278aeeb99ac673ee7ba462d82f33268fb005f9 +size 384 diff --git a/margin_logs/step_0000418.npy b/margin_logs/step_0000418.npy new file mode 100644 index 0000000..714b1ce --- /dev/null +++ b/margin_logs/step_0000418.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2e943c39cc05895567016d1064bd1ba2bba612d44fde16f6e93999b7663b977 +size 384 diff --git a/margin_logs/step_0000419.npy b/margin_logs/step_0000419.npy new file mode 100644 index 0000000..4666d6f --- /dev/null +++ b/margin_logs/step_0000419.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a862115bf2ba51058afc386fd20d3349b1c507e590984d062edb3244365a86e +size 384 diff --git a/margin_logs/step_0000420.npy b/margin_logs/step_0000420.npy new file mode 100644 index 0000000..d60a2ac --- /dev/null +++ b/margin_logs/step_0000420.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03fb3de6c1f5f91f65716591a4b6a2c39f0953b333964e9df644642b071eb137 +size 384 diff --git a/margin_logs/step_0000421.npy b/margin_logs/step_0000421.npy new file mode 100644 index 0000000..a49485f --- /dev/null +++ b/margin_logs/step_0000421.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:664af0756a5322f7e0e6fbb5f620caf9ac468d6b0b3dadc0b083b63959726f12 +size 384 diff --git a/margin_logs/step_0000422.npy b/margin_logs/step_0000422.npy new file mode 100644 index 0000000..a83e0ad --- /dev/null +++ b/margin_logs/step_0000422.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c184400223e11844eb149117b00bf0084fa4d07d771a2cd1f0d035c16a1aab9 +size 384 diff --git a/margin_logs/step_0000423.npy b/margin_logs/step_0000423.npy new file mode 100644 index 0000000..47fa026 --- /dev/null +++ b/margin_logs/step_0000423.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee1ddef31e00aa22021a5314bfbaa498afaf75fea53cdd6dbac4eac5a92a4472 +size 384 diff --git a/margin_logs/step_0000424.npy b/margin_logs/step_0000424.npy new file mode 100644 index 0000000..fb6f3c3 --- /dev/null +++ b/margin_logs/step_0000424.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc07bf9cd3b5a426960fe87430183c0f3a7b1cbd0199ba3e639479161f4f3ec9 +size 384 diff --git a/margin_logs/step_0000425.npy b/margin_logs/step_0000425.npy new file mode 100644 index 0000000..495d023 --- /dev/null +++ b/margin_logs/step_0000425.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:233659599f94a0ac9b6d9b8aa469a06678d49a0feab5aa67d76b84c4e26aecda +size 384 diff --git a/margin_logs/step_0000426.npy b/margin_logs/step_0000426.npy new file mode 100644 index 0000000..4ec6aa6 --- /dev/null +++ b/margin_logs/step_0000426.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a583cc14002658ef55b1584771c0a5179135e1c507afd284d96fa61468cfc45 +size 384 diff --git a/margin_logs/step_0000427.npy b/margin_logs/step_0000427.npy new file mode 100644 index 0000000..1d8a422 --- /dev/null +++ b/margin_logs/step_0000427.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d0273b7be8846b74514e7b0e960146f4c46532d63ad02296a38e2f925bf0032 +size 384 diff --git a/margin_logs/step_0000428.npy b/margin_logs/step_0000428.npy new file mode 100644 index 0000000..d86e9c9 --- /dev/null +++ b/margin_logs/step_0000428.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de3e58da19a8bde37d2d6ec7bd7694707ab012bb07834f3a12e6315a87935b3c +size 384 diff --git a/margin_logs/step_0000429.npy b/margin_logs/step_0000429.npy new file mode 100644 index 0000000..17e3cf3 --- /dev/null +++ b/margin_logs/step_0000429.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9bf55beac4a0bb8d7a6512606ae7c23a0753287cf31d0631178c02c442509e2 +size 384 diff --git a/margin_logs/step_0000430.npy b/margin_logs/step_0000430.npy new file mode 100644 index 0000000..4816685 --- /dev/null +++ b/margin_logs/step_0000430.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de14900a81bd2fdcf96e4af669129269f96f92f231078f7a9063d9f270434879 +size 384 diff --git a/margin_logs/step_0000431.npy b/margin_logs/step_0000431.npy new file mode 100644 index 0000000..4cd4af6 --- /dev/null +++ b/margin_logs/step_0000431.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:245e142f19428e3fadd1003990b71b697c1d2f0df123e3d099e122d753dfd286 +size 384 diff --git a/margin_logs/step_0000432.npy b/margin_logs/step_0000432.npy new file mode 100644 index 0000000..a70d604 --- /dev/null +++ b/margin_logs/step_0000432.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:baaf0ca23a6dd0990c6c642666276b22b254f91f3aac749d4b9c597b1b0c480d +size 384 diff --git a/margin_logs/step_0000433.npy b/margin_logs/step_0000433.npy new file mode 100644 index 0000000..eb3c57f --- /dev/null +++ b/margin_logs/step_0000433.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b150f6134971aaf572bf3a187542dff66c5ec1cf8f9264e9cb392c985d2ab7d +size 384 diff --git a/margin_logs/step_0000434.npy b/margin_logs/step_0000434.npy new file mode 100644 index 0000000..cb3bddd --- /dev/null +++ b/margin_logs/step_0000434.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:575e967ac23c7fd0bf440a2b438e921d636fec510fdd4deed8d98fd6ad23ba42 +size 384 diff --git a/margin_logs/step_0000435.npy b/margin_logs/step_0000435.npy new file mode 100644 index 0000000..54f92ac --- /dev/null +++ b/margin_logs/step_0000435.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f35506e014d0c846a8dcc8d2c39fecabe6c7258f0a5dee6b7016c01cfbcc0b6f +size 384 diff --git a/margin_logs/step_0000436.npy b/margin_logs/step_0000436.npy new file mode 100644 index 0000000..a7d6db0 --- /dev/null +++ b/margin_logs/step_0000436.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b87d480e1acf31c99a2cefbcc434a38cd04316bb6d852d3227293cef8faf5a89 +size 384 diff --git a/margin_logs/step_0000437.npy b/margin_logs/step_0000437.npy new file mode 100644 index 0000000..ec0ae39 --- /dev/null +++ b/margin_logs/step_0000437.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33fa84b97119ebc3369782aa0b5b0590427eda67b3878aae07c9c7c27c626706 +size 384 diff --git a/margin_logs/step_0000438.npy b/margin_logs/step_0000438.npy new file mode 100644 index 0000000..10eab4f --- /dev/null +++ b/margin_logs/step_0000438.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25e6551af27f1bb379f3a0f0be3e56a429e0d8cec856937d5d4923403a589a25 +size 384 diff --git a/margin_logs/step_0000439.npy b/margin_logs/step_0000439.npy new file mode 100644 index 0000000..5d0a010 --- /dev/null +++ b/margin_logs/step_0000439.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d23d05a54146e9e07aaeea3883a94961213c85ecb770f221a218b8d45248200f +size 384 diff --git a/margin_logs/step_0000440.npy b/margin_logs/step_0000440.npy new file mode 100644 index 0000000..f9fc88a --- /dev/null +++ b/margin_logs/step_0000440.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f04e432d118bee07fe6ba5c167f4c460d28c38542d21cd9372a2643374460f8d +size 384 diff --git a/margin_logs/step_0000441.npy b/margin_logs/step_0000441.npy new file mode 100644 index 0000000..9264eed --- /dev/null +++ b/margin_logs/step_0000441.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2146cc4c5c3ccc67b05ef1ec90fb013271d6f23dc43971d48e73b2ab6f41b2cc +size 384 diff --git a/margin_logs/step_0000442.npy b/margin_logs/step_0000442.npy new file mode 100644 index 0000000..415b633 --- /dev/null +++ b/margin_logs/step_0000442.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6951c96f8fe657d611a47be8c413994a5747e5d57aa012833e2ff1d5c5de168 +size 384 diff --git a/margin_logs/step_0000443.npy b/margin_logs/step_0000443.npy new file mode 100644 index 0000000..fb90938 --- /dev/null +++ b/margin_logs/step_0000443.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22abab9c05916246d0976ab708bc7a93cfe3ccaa9ce5a5b4e8c72b38b3171ad7 +size 384 diff --git a/margin_logs/step_0000444.npy b/margin_logs/step_0000444.npy new file mode 100644 index 0000000..119fc52 --- /dev/null +++ b/margin_logs/step_0000444.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eff942c54d87bd597e7af09e279a5c531e18f8d056d159f13d7fe10c768c041f +size 384 diff --git a/margin_logs/step_0000445.npy b/margin_logs/step_0000445.npy new file mode 100644 index 0000000..48431ec --- /dev/null +++ b/margin_logs/step_0000445.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d70e6211c5304e051d3622b281ca5b077086539637d10048f242514b06b9eba +size 384 diff --git a/margin_logs/step_0000446.npy b/margin_logs/step_0000446.npy new file mode 100644 index 0000000..e94ff2c --- /dev/null +++ b/margin_logs/step_0000446.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:757bdb2d108216146b23fe79974add342d8418cd6f4c646bc90172160c57411d +size 384 diff --git a/margin_logs/step_0000447.npy b/margin_logs/step_0000447.npy new file mode 100644 index 0000000..8ad7d3f --- /dev/null +++ b/margin_logs/step_0000447.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9294c62d2d61631355aa735f1b57dfeddfb9d75774321b95484bcb90f165f576 +size 384 diff --git a/margin_logs/step_0000448.npy b/margin_logs/step_0000448.npy new file mode 100644 index 0000000..6398e08 --- /dev/null +++ b/margin_logs/step_0000448.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97896d79cf1026ec1b631b26868f69d1f7e864b76448f7a8ec3f1dd1d99ca2c7 +size 384 diff --git a/margin_logs/step_0000449.npy b/margin_logs/step_0000449.npy new file mode 100644 index 0000000..8db3213 --- /dev/null +++ b/margin_logs/step_0000449.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c52ba469336d9869f85d73f02ee355340fac0a465cecc3d363c8857e926dde6 +size 384 diff --git a/margin_logs/step_0000450.npy b/margin_logs/step_0000450.npy new file mode 100644 index 0000000..64ba95e --- /dev/null +++ b/margin_logs/step_0000450.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71b8dc210ef289f0561d77482b5c6a5e7d088b2cbb9f9bbcc981228943888086 +size 384 diff --git a/margin_logs/step_0000451.npy b/margin_logs/step_0000451.npy new file mode 100644 index 0000000..508f3c0 --- /dev/null +++ b/margin_logs/step_0000451.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ae7b11cf8709547ac955cebb7cadc02852eda0635cd071272e151a8f91b34d2 +size 384 diff --git a/margin_logs/step_0000452.npy b/margin_logs/step_0000452.npy new file mode 100644 index 0000000..77b0f23 --- /dev/null +++ b/margin_logs/step_0000452.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15021877d12d49c1845c396ed3d7b6ca1d380ee7c3bcbec715f4caeca176e9b3 +size 384 diff --git a/margin_logs/step_0000453.npy b/margin_logs/step_0000453.npy new file mode 100644 index 0000000..ef689dc --- /dev/null +++ b/margin_logs/step_0000453.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:230f168b91de8cc709e9b6d8a9d8a7625776a371f2632ec29b407bbc2e2547fb +size 384 diff --git a/margin_logs/step_0000454.npy b/margin_logs/step_0000454.npy new file mode 100644 index 0000000..194fdb8 --- /dev/null +++ b/margin_logs/step_0000454.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11575ce65ef7930b157e8e7f6fd8c6ed0593e7dde03263e12c2b9fc39a875dcf +size 384 diff --git a/margin_logs/step_0000455.npy b/margin_logs/step_0000455.npy new file mode 100644 index 0000000..6678e1a --- /dev/null +++ b/margin_logs/step_0000455.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f96f8ad864ffd574590b7a5eacc8429301b424f0333271487251fec22c1b1f8 +size 384 diff --git a/margin_logs/step_0000456.npy b/margin_logs/step_0000456.npy new file mode 100644 index 0000000..60abc62 --- /dev/null +++ b/margin_logs/step_0000456.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90140eacdbb58e1d1e1e4f11b5dce8def2e525c19906a3576a4aa9a15733ee77 +size 384 diff --git a/margin_logs/step_0000457.npy b/margin_logs/step_0000457.npy new file mode 100644 index 0000000..86dcd6d --- /dev/null +++ b/margin_logs/step_0000457.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1a6075dd2223ca37181706ef81e940fa35308e64f929463d53b3dde17ed3f40 +size 384 diff --git a/margin_logs/step_0000458.npy b/margin_logs/step_0000458.npy new file mode 100644 index 0000000..476de7f --- /dev/null +++ b/margin_logs/step_0000458.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f5ab2337c1824ee451d1638c124463a3a2fb5ed3cb599877ff1c09bb61703b9 +size 384 diff --git a/margin_logs/step_0000459.npy b/margin_logs/step_0000459.npy new file mode 100644 index 0000000..cfbe38b --- /dev/null +++ b/margin_logs/step_0000459.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08a2c924d0ea5a27c677cbb52a9173e5a58a1e9682f17277e02ce6a878a56849 +size 384 diff --git a/margin_logs/step_0000460.npy b/margin_logs/step_0000460.npy new file mode 100644 index 0000000..2682b29 --- /dev/null +++ b/margin_logs/step_0000460.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35d5059669db6958decc328091f75ddf5c9683f0ccea91381cf296228e1fc50e +size 384 diff --git a/margin_logs/step_0000461.npy b/margin_logs/step_0000461.npy new file mode 100644 index 0000000..e518175 --- /dev/null +++ b/margin_logs/step_0000461.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb58bbe2703d05cd43f48c859684782f94cbb46056d237db9ba73af1424585a3 +size 384 diff --git a/margin_logs/step_0000462.npy b/margin_logs/step_0000462.npy new file mode 100644 index 0000000..f94b038 --- /dev/null +++ b/margin_logs/step_0000462.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e08fa904e96189f26d79d94b17e1ca0dee5d35008c500aca0eac6b702fb03dc5 +size 384 diff --git a/margin_logs/step_0000463.npy b/margin_logs/step_0000463.npy new file mode 100644 index 0000000..7b2d6f1 --- /dev/null +++ b/margin_logs/step_0000463.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bf9c54e0244eec5aed653c43e2e545ae502cd7105fd5f2756069661661ab9b5 +size 384 diff --git a/margin_logs/step_0000464.npy b/margin_logs/step_0000464.npy new file mode 100644 index 0000000..d9cb503 --- /dev/null +++ b/margin_logs/step_0000464.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd21472cf2df4d7468203478d68ddf0b29d810769f7afd6c75c0f14ca406c51b +size 384 diff --git a/margin_logs/step_0000465.npy b/margin_logs/step_0000465.npy new file mode 100644 index 0000000..7a0009e --- /dev/null +++ b/margin_logs/step_0000465.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f105ce00b2b3d3c5921d222accd412a2fb259278192550b722d2b032d09447c +size 384 diff --git a/margin_logs/step_0000466.npy b/margin_logs/step_0000466.npy new file mode 100644 index 0000000..f0ab812 --- /dev/null +++ b/margin_logs/step_0000466.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dab4bbf17aeb535210554dd73dda9d5afe7e392aa134abdb46f34b289b894b86 +size 384 diff --git a/margin_logs/step_0000467.npy b/margin_logs/step_0000467.npy new file mode 100644 index 0000000..3bf4115 --- /dev/null +++ b/margin_logs/step_0000467.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95f940c719a875da2ea1aa2df985773140b5dd9448d9614bc6a57c8818a67b40 +size 384 diff --git a/margin_logs/step_0000468.npy b/margin_logs/step_0000468.npy new file mode 100644 index 0000000..de1c085 --- /dev/null +++ b/margin_logs/step_0000468.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c96c00133c744c01ff6fed0e721efd4dd286bd24b3977abbcf9a906c410d5ba7 +size 384 diff --git a/margin_logs/step_0000469.npy b/margin_logs/step_0000469.npy new file mode 100644 index 0000000..8b66bd2 --- /dev/null +++ b/margin_logs/step_0000469.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbe61154ab350b8e5f3ac1ff386e183935c39fef31d0f51c27b71fbb0f349da0 +size 384 diff --git a/margin_logs/step_0000470.npy b/margin_logs/step_0000470.npy new file mode 100644 index 0000000..b8738b3 --- /dev/null +++ b/margin_logs/step_0000470.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4fd87b5e8d085d78b0f2dcbd76ad0e5b325ed849fb42bd4960fc5a892b799ea +size 384 diff --git a/margin_logs/step_0000471.npy b/margin_logs/step_0000471.npy new file mode 100644 index 0000000..55fbd59 --- /dev/null +++ b/margin_logs/step_0000471.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b871d3c6c4bb9700505758054412ed358cd9995e88c9eae0b072b7d56d536f76 +size 384 diff --git a/margin_logs/step_0000472.npy b/margin_logs/step_0000472.npy new file mode 100644 index 0000000..8b249bd --- /dev/null +++ b/margin_logs/step_0000472.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2278c01be62eb1ab05a792a5181ca041d29ec08cceed464ae45e8b6f4401264a +size 384 diff --git a/margin_logs/step_0000473.npy b/margin_logs/step_0000473.npy new file mode 100644 index 0000000..e7ccecb --- /dev/null +++ b/margin_logs/step_0000473.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6fd47cdd585169b70df3744d91de36f38b8bac241c6b27afe4faf5d36b6aa41 +size 384 diff --git a/margin_logs/step_0000474.npy b/margin_logs/step_0000474.npy new file mode 100644 index 0000000..ec3645b --- /dev/null +++ b/margin_logs/step_0000474.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7625c092024ca499487afc80506d89c625f7f7be7da98cdf55f59745490de4be +size 384 diff --git a/margin_logs/step_0000475.npy b/margin_logs/step_0000475.npy new file mode 100644 index 0000000..98a3137 --- /dev/null +++ b/margin_logs/step_0000475.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b62ae36606a1fdfd0d3af933c469bb4fb3540b49451c9ee6dcbed81c1555cda +size 384 diff --git a/margin_logs/step_0000476.npy b/margin_logs/step_0000476.npy new file mode 100644 index 0000000..44cacd1 --- /dev/null +++ b/margin_logs/step_0000476.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:593f9c8bd07edbf94ce301ed72ee1a5c4b979288b7334b59b30d69bfa28f577c +size 384 diff --git a/margin_logs/step_0000477.npy b/margin_logs/step_0000477.npy new file mode 100644 index 0000000..920f763 --- /dev/null +++ b/margin_logs/step_0000477.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08d888682d5179b7b89c91b7d5740f304f7c0c03c616abd34563032d5cf99d09 +size 384 diff --git a/margin_logs/step_0000478.npy b/margin_logs/step_0000478.npy new file mode 100644 index 0000000..a325051 --- /dev/null +++ b/margin_logs/step_0000478.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cf1500b4c048de8f85ba0a98f1e10b3aa40413ad93e6e2910c7f2b6dd575945 +size 384 diff --git a/margin_logs/step_0000479.npy b/margin_logs/step_0000479.npy new file mode 100644 index 0000000..8ee1204 --- /dev/null +++ b/margin_logs/step_0000479.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2ec37124d6b11e857915f8980c101ba755873dc862ca4cb227077c15af33379 +size 384 diff --git a/margin_logs/step_0000480.npy b/margin_logs/step_0000480.npy new file mode 100644 index 0000000..d2b41c5 --- /dev/null +++ b/margin_logs/step_0000480.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f2566ac021410edc5b12d19bee1411379e066a50ac9f8394c85df616ff53225 +size 384 diff --git a/margin_logs/step_0000481.npy b/margin_logs/step_0000481.npy new file mode 100644 index 0000000..b6c9dcb --- /dev/null +++ b/margin_logs/step_0000481.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:929364cc0ada7ccd110c5157b55bf026b0b9eced8e2e2af43a0f3f24351ea146 +size 384 diff --git a/margin_logs/step_0000482.npy b/margin_logs/step_0000482.npy new file mode 100644 index 0000000..a50f7d4 --- /dev/null +++ b/margin_logs/step_0000482.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b14bace64c4d221d26d38816aef0325839b4ea6ec6ade6537c12c3242e57e82 +size 384 diff --git a/margin_logs/step_0000483.npy b/margin_logs/step_0000483.npy new file mode 100644 index 0000000..1db6806 --- /dev/null +++ b/margin_logs/step_0000483.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab2bd2ae2a8aaa752eef7da4dd00bcc596fbbadb3c8a33f95f5a3949a27830bd +size 384 diff --git a/margin_logs/step_0000484.npy b/margin_logs/step_0000484.npy new file mode 100644 index 0000000..b9c100d --- /dev/null +++ b/margin_logs/step_0000484.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dca2798804638e94871295eb94c44d98f6760929abeda27043577747ff19728 +size 384 diff --git a/margin_logs/step_0000485.npy b/margin_logs/step_0000485.npy new file mode 100644 index 0000000..31b1d00 --- /dev/null +++ b/margin_logs/step_0000485.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e09b8e5d4bae1fc6516cd660dbe80948c0ae2629fc9b0d51e53f361614d50a1 +size 384 diff --git a/margin_logs/step_0000486.npy b/margin_logs/step_0000486.npy new file mode 100644 index 0000000..865adb3 --- /dev/null +++ b/margin_logs/step_0000486.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7686c4486211062cc2077f11c06c41c63b44f48a87ec80b4b3f03105e61f0aa +size 384 diff --git a/margin_logs/step_0000487.npy b/margin_logs/step_0000487.npy new file mode 100644 index 0000000..2ad49a3 --- /dev/null +++ b/margin_logs/step_0000487.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cffb8a6da65da4ad98b43032717461aa44016a6c5b83df1f7ed6ab89632c870 +size 384 diff --git a/margin_logs/step_0000488.npy b/margin_logs/step_0000488.npy new file mode 100644 index 0000000..6d97be8 --- /dev/null +++ b/margin_logs/step_0000488.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25a2afe7fb9439e6916dd7a654a6074b124d7617827b3345011b06070498ee32 +size 384 diff --git a/margin_logs/step_0000489.npy b/margin_logs/step_0000489.npy new file mode 100644 index 0000000..57c42a6 --- /dev/null +++ b/margin_logs/step_0000489.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6cabc2f8c962fdd629b8fcd614ea8912bf10ae3f23fe5f1dd90c73e4be7b904 +size 384 diff --git a/margin_logs/step_0000490.npy b/margin_logs/step_0000490.npy new file mode 100644 index 0000000..50624f3 --- /dev/null +++ b/margin_logs/step_0000490.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d9df245e230861a84cbcf65b05954a910554bcee26735d32c283f0eab5cbe11 +size 384 diff --git a/margin_logs/step_0000491.npy b/margin_logs/step_0000491.npy new file mode 100644 index 0000000..f120cb6 --- /dev/null +++ b/margin_logs/step_0000491.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a906f185dac681251a8cc61df2bd8bdc930aeeed68e01f8b39acf7c08dadc067 +size 384 diff --git a/margin_logs/step_0000492.npy b/margin_logs/step_0000492.npy new file mode 100644 index 0000000..d87d15f --- /dev/null +++ b/margin_logs/step_0000492.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f69dd80ffefeb0755966597525276b980dfc7a1ea4dd1e755cdec1237c2dd42f +size 384 diff --git a/margin_logs/step_0000493.npy b/margin_logs/step_0000493.npy new file mode 100644 index 0000000..5e6c406 --- /dev/null +++ b/margin_logs/step_0000493.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:580f4962fb647258ee2cfdb2ca0079190a61b2dd4cc6258cf8597ea948af1c17 +size 384 diff --git a/margin_logs/step_0000494.npy b/margin_logs/step_0000494.npy new file mode 100644 index 0000000..4f3d176 --- /dev/null +++ b/margin_logs/step_0000494.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df4a279cc724e1a826e05e411fc0572b8ad8b3d283a9a76a35d54e1a14b9e7ef +size 384 diff --git a/margin_logs/step_0000495.npy b/margin_logs/step_0000495.npy new file mode 100644 index 0000000..a93c669 --- /dev/null +++ b/margin_logs/step_0000495.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7109f76fc1497c0ca1ee8e07abc6d4bcf6db61f2f29112829d29fc277095b420 +size 384 diff --git a/margin_logs/step_0000496.npy b/margin_logs/step_0000496.npy new file mode 100644 index 0000000..3d747d6 --- /dev/null +++ b/margin_logs/step_0000496.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46095af58838d786abb1b76360c7c054d9c64c987d08d44e5bc965b151d49182 +size 384 diff --git a/margin_logs/step_0000497.npy b/margin_logs/step_0000497.npy new file mode 100644 index 0000000..96e47c2 --- /dev/null +++ b/margin_logs/step_0000497.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a13d43231165ab73d6e1ea6107bded40d5dcd99e3a95b3f70d53cdb412dd1fa +size 384 diff --git a/margin_logs/step_0000498.npy b/margin_logs/step_0000498.npy new file mode 100644 index 0000000..782bbb5 --- /dev/null +++ b/margin_logs/step_0000498.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d01d15074826c87476579b5c3479c8f2fd1a871b126a7331df358ebee3a2106 +size 384 diff --git a/margin_logs/step_0000499.npy b/margin_logs/step_0000499.npy new file mode 100644 index 0000000..83b3543 --- /dev/null +++ b/margin_logs/step_0000499.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e440a56dcd784115b84b79e93f2dffc34dc523b7f2556c25f6a9652a2c82136d +size 384 diff --git a/margin_logs/step_0000500.npy b/margin_logs/step_0000500.npy new file mode 100644 index 0000000..7b49453 --- /dev/null +++ b/margin_logs/step_0000500.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eea76aceb682c60e59bedd3269b6fe1dcb75af9781fc1cf7f9c1c80d667fd57d +size 384 diff --git a/margin_logs/step_0000501.npy b/margin_logs/step_0000501.npy new file mode 100644 index 0000000..3f13692 --- /dev/null +++ b/margin_logs/step_0000501.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e38af48c7062f1d29f2db9fd6ff65b2b3be4505fff57fe8ad25120fba460f4be +size 384 diff --git a/margin_logs/step_0000502.npy b/margin_logs/step_0000502.npy new file mode 100644 index 0000000..4ca6232 --- /dev/null +++ b/margin_logs/step_0000502.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e093c6fd28d08e1a0537257b2f0586dfc420bbd0baba630e9c28c1330c859c3a +size 384 diff --git a/margin_logs/step_0000503.npy b/margin_logs/step_0000503.npy new file mode 100644 index 0000000..f404333 --- /dev/null +++ b/margin_logs/step_0000503.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed6cfb6bcfb2913f6abdfe2856136e75b9235807497d6718fe187dc3ac208454 +size 384 diff --git a/margin_logs/step_0000504.npy b/margin_logs/step_0000504.npy new file mode 100644 index 0000000..cbdde94 --- /dev/null +++ b/margin_logs/step_0000504.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ba2bcb8af55513b847982bb9c8e483276d760c3b888f66610c0b70d08b0046e +size 384 diff --git a/margin_logs/step_0000505.npy b/margin_logs/step_0000505.npy new file mode 100644 index 0000000..ce0cb64 --- /dev/null +++ b/margin_logs/step_0000505.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10ca3d971dd6827f734df9f67f9fd51d8f055b3cfcac6500a3792d42ce7576e0 +size 384 diff --git a/margin_logs/step_0000506.npy b/margin_logs/step_0000506.npy new file mode 100644 index 0000000..b86ecf6 --- /dev/null +++ b/margin_logs/step_0000506.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c274d23630744c948ebac9b15285635d511d47ebaf37401ea8e7780b49ddb98c +size 384 diff --git a/margin_logs/step_0000507.npy b/margin_logs/step_0000507.npy new file mode 100644 index 0000000..f07fecf --- /dev/null +++ b/margin_logs/step_0000507.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:594fd15f5257b959de06d48e6d0fe2aa2d51d10f0bc3d9c0337cb097b7616547 +size 384 diff --git a/margin_logs/step_0000508.npy b/margin_logs/step_0000508.npy new file mode 100644 index 0000000..f538504 --- /dev/null +++ b/margin_logs/step_0000508.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6981feb497473261f7a0fc5e4627c8fa05fe76b89829ef3c962dc94f64c8c477 +size 384 diff --git a/margin_logs/step_0000509.npy b/margin_logs/step_0000509.npy new file mode 100644 index 0000000..5b4585e --- /dev/null +++ b/margin_logs/step_0000509.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:141e4b80196487c573d18b6615259c51ecc5027ee25f07258b4530f841e39959 +size 384 diff --git a/margin_logs/step_0000510.npy b/margin_logs/step_0000510.npy new file mode 100644 index 0000000..99d637f --- /dev/null +++ b/margin_logs/step_0000510.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a81af2096339c46900e7e7279a9d4ac82920faca3c600994c30dca051c6292ca +size 384 diff --git a/margin_logs/step_0000511.npy b/margin_logs/step_0000511.npy new file mode 100644 index 0000000..f6e58af --- /dev/null +++ b/margin_logs/step_0000511.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2a61bf14fdde283b61a40f57fed9bbab043b97b30b7fafe1e712cc7cd766685 +size 384 diff --git a/margin_logs/step_0000512.npy b/margin_logs/step_0000512.npy new file mode 100644 index 0000000..7e24dcc --- /dev/null +++ b/margin_logs/step_0000512.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:549b5858a95b0e0decdd94c80954fa0a51798520ee969623f25064e6213ab355 +size 384 diff --git a/margin_logs/step_0000513.npy b/margin_logs/step_0000513.npy new file mode 100644 index 0000000..53358fd --- /dev/null +++ b/margin_logs/step_0000513.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09b1578a179eac5842bdbe675dd07415e9a25fd4b95875b32942dcbcc55a730b +size 384 diff --git a/margin_logs/step_0000514.npy b/margin_logs/step_0000514.npy new file mode 100644 index 0000000..de6d8f8 --- /dev/null +++ b/margin_logs/step_0000514.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbdd8be3267fafb51b2b27d9d6a99b42e03c998e6a61feca07ba43d48cdfeef0 +size 384 diff --git a/margin_logs/step_0000515.npy b/margin_logs/step_0000515.npy new file mode 100644 index 0000000..5b278e4 --- /dev/null +++ b/margin_logs/step_0000515.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c0b5dd87863c3fb0315e89fde797eab4faa5cc53c895f477ffe28087d522f88 +size 384 diff --git a/margin_logs/step_0000516.npy b/margin_logs/step_0000516.npy new file mode 100644 index 0000000..795858c --- /dev/null +++ b/margin_logs/step_0000516.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a71d0a2e47a865a31de9a5a68b32b28ea0d4e5908ce54ff8856f9fde143540e +size 384 diff --git a/margin_logs/step_0000517.npy b/margin_logs/step_0000517.npy new file mode 100644 index 0000000..b52b044 --- /dev/null +++ b/margin_logs/step_0000517.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19245e4fec1f7586f754308cc98f8f5fef55843e06fef40dca836167167a5f67 +size 384 diff --git a/margin_logs/step_0000518.npy b/margin_logs/step_0000518.npy new file mode 100644 index 0000000..8ed8baa --- /dev/null +++ b/margin_logs/step_0000518.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:623ffa3302aedb3126be29289646751886deb0e4ffd175cc8759f45e47727d29 +size 384 diff --git a/margin_logs/step_0000519.npy b/margin_logs/step_0000519.npy new file mode 100644 index 0000000..7890ef3 --- /dev/null +++ b/margin_logs/step_0000519.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:033c65a301c5bd31d62b7d1ea50acec7034a227bd543529f3ff89184fe0e3bb7 +size 384 diff --git a/margin_logs/step_0000520.npy b/margin_logs/step_0000520.npy new file mode 100644 index 0000000..2508372 --- /dev/null +++ b/margin_logs/step_0000520.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3cce69dd9ae90ed9bb33c8fc8b5793db7e398386e711b56611eea0fa4cf9efc +size 384 diff --git a/margin_logs/step_0000521.npy b/margin_logs/step_0000521.npy new file mode 100644 index 0000000..219ab3c --- /dev/null +++ b/margin_logs/step_0000521.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:214da5028d209ce649fa3486011747d380754c525cb12fc804365f223fe5119f +size 384 diff --git a/margin_logs/step_0000522.npy b/margin_logs/step_0000522.npy new file mode 100644 index 0000000..8efcf3d --- /dev/null +++ b/margin_logs/step_0000522.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0af2a631b5fd134ea7821506d87abeb327d4e6f6f32b4e3f253f5f44cdd2143f +size 384 diff --git a/margin_logs/step_0000523.npy b/margin_logs/step_0000523.npy new file mode 100644 index 0000000..01b13e7 --- /dev/null +++ b/margin_logs/step_0000523.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19d7f04dacf55dfc3d5d925a7a23a08db20508470cd836a40a3b9e1b8832b4ed +size 384 diff --git a/margin_logs/step_0000524.npy b/margin_logs/step_0000524.npy new file mode 100644 index 0000000..bb29ed4 --- /dev/null +++ b/margin_logs/step_0000524.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b545b9caf66573b08f27573c564a2212fe6c4c442da85589dedb810888d92a81 +size 384 diff --git a/margin_logs/step_0000525.npy b/margin_logs/step_0000525.npy new file mode 100644 index 0000000..71b5c23 --- /dev/null +++ b/margin_logs/step_0000525.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0173f79f5e4068b98bac900c04353e82e18082fa30c26309c04f2b3751d96e5 +size 384 diff --git a/margin_logs/step_0000526.npy b/margin_logs/step_0000526.npy new file mode 100644 index 0000000..d4583c3 --- /dev/null +++ b/margin_logs/step_0000526.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c20a60d8c798300b7fbaf5677a7e87e0cf2acf7d365215e29316d69e82d8d15 +size 384 diff --git a/margin_logs/step_0000527.npy b/margin_logs/step_0000527.npy new file mode 100644 index 0000000..ddfd167 --- /dev/null +++ b/margin_logs/step_0000527.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2de7c6c03d02c400e54b8a32bcd42104184cbca6fb7411435f1263a525e39f60 +size 384 diff --git a/margin_logs/step_0000528.npy b/margin_logs/step_0000528.npy new file mode 100644 index 0000000..27f5027 --- /dev/null +++ b/margin_logs/step_0000528.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e11a5a91aff1189e6930a0421dad71a42094386247a17a3032932ebc54e9e78c +size 384 diff --git a/margin_logs/step_0000529.npy b/margin_logs/step_0000529.npy new file mode 100644 index 0000000..0f7f4be --- /dev/null +++ b/margin_logs/step_0000529.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f07973276a1f119c3d15631b636ad1ab7de7c82e38aff1bdc3735a0e321abc1a +size 384 diff --git a/margin_logs/step_0000530.npy b/margin_logs/step_0000530.npy new file mode 100644 index 0000000..1e407de --- /dev/null +++ b/margin_logs/step_0000530.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8d3ba7fb08b1f43388e14782dcb4a6bcca76e4841407e08542a76be9d0e1a59 +size 384 diff --git a/margin_logs/step_0000531.npy b/margin_logs/step_0000531.npy new file mode 100644 index 0000000..283b1b1 --- /dev/null +++ b/margin_logs/step_0000531.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ede681c6657746affa6b5387561f97de12f6b8d46f4b8fb8d6379c54c166058 +size 384 diff --git a/margin_logs/step_0000532.npy b/margin_logs/step_0000532.npy new file mode 100644 index 0000000..1cf2b04 --- /dev/null +++ b/margin_logs/step_0000532.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebc072280673bc3b26a7dd8b1bb9153f0b7eb7872bb53f3d50f4f8597bbac6b4 +size 384 diff --git a/margin_logs/step_0000533.npy b/margin_logs/step_0000533.npy new file mode 100644 index 0000000..15c2d46 --- /dev/null +++ b/margin_logs/step_0000533.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:355d41c8bc3f9552356d836cfc79c7afada3f68fa1546fc570e09ed14b85bb1f +size 384 diff --git a/margin_logs/step_0000534.npy b/margin_logs/step_0000534.npy new file mode 100644 index 0000000..691d1d0 --- /dev/null +++ b/margin_logs/step_0000534.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f849de67c185fbd2977e3a7570a8876e6ac052422fd89010a0708d85ee2ec66a +size 384 diff --git a/margin_logs/step_0000535.npy b/margin_logs/step_0000535.npy new file mode 100644 index 0000000..55b5ded --- /dev/null +++ b/margin_logs/step_0000535.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79c68bd75ee05e817aa935d6ccff0af4b399dc4d9839bf53738d5cb4af1ade4e +size 384 diff --git a/margin_logs/step_0000536.npy b/margin_logs/step_0000536.npy new file mode 100644 index 0000000..71456b1 --- /dev/null +++ b/margin_logs/step_0000536.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:807b312ca74636de1ba0f42d76d778aa5075568816e128c3c2b3285ddca1b9fa +size 384 diff --git a/margin_logs/step_0000537.npy b/margin_logs/step_0000537.npy new file mode 100644 index 0000000..9426eac --- /dev/null +++ b/margin_logs/step_0000537.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b10d499b11921fe060be5b6adfc839eeb028d86316ab4b8cc5a8fe795246640 +size 384 diff --git a/margin_logs/step_0000538.npy b/margin_logs/step_0000538.npy new file mode 100644 index 0000000..23e6445 --- /dev/null +++ b/margin_logs/step_0000538.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d99132cc9301dc7629cf25538c2f67aa44f767aa2209f8ee83961250ba5d4fec +size 384 diff --git a/margin_logs/step_0000539.npy b/margin_logs/step_0000539.npy new file mode 100644 index 0000000..b80c8be --- /dev/null +++ b/margin_logs/step_0000539.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9faa2b156328ca6239466a2ea4db94fd0ac05c2d251592a6839c5eaf91b76e19 +size 384 diff --git a/margin_logs/step_0000540.npy b/margin_logs/step_0000540.npy new file mode 100644 index 0000000..4f4e3d9 --- /dev/null +++ b/margin_logs/step_0000540.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a130e6af2dc044fb69d82c43a563971fa7f0fc79e19afa52931eaac60fc3406c +size 384 diff --git a/margin_logs/step_0000541.npy b/margin_logs/step_0000541.npy new file mode 100644 index 0000000..4659c04 --- /dev/null +++ b/margin_logs/step_0000541.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a743c103e433eb55fc9812adf318d9c227cd723f65e258879d892284027407aa +size 384 diff --git a/margin_logs/step_0000542.npy b/margin_logs/step_0000542.npy new file mode 100644 index 0000000..f6d98b7 --- /dev/null +++ b/margin_logs/step_0000542.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cb075294886dc239e9763b3d93a2991dbba17fd15ce5540fb370f4b21a55c6e +size 384 diff --git a/margin_logs/step_0000543.npy b/margin_logs/step_0000543.npy new file mode 100644 index 0000000..7b804ad --- /dev/null +++ b/margin_logs/step_0000543.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6c9d5a2b6160db777755400e80b7efc769dd678378a0a3e6abecc85ce5fdee5 +size 384 diff --git a/margin_logs/step_0000544.npy b/margin_logs/step_0000544.npy new file mode 100644 index 0000000..6ffac34 --- /dev/null +++ b/margin_logs/step_0000544.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3101478c57ab50dc7e9ba2e89a48548712e621e8163248194b5273165cce5a44 +size 384 diff --git a/margin_logs/step_0000545.npy b/margin_logs/step_0000545.npy new file mode 100644 index 0000000..26d038a --- /dev/null +++ b/margin_logs/step_0000545.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a231a654c51501cb1b1dcf2033b80f1e867f594aab5c6144aec574995704ff4 +size 384 diff --git a/margin_logs/step_0000546.npy b/margin_logs/step_0000546.npy new file mode 100644 index 0000000..fd1ae28 --- /dev/null +++ b/margin_logs/step_0000546.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bbf95ae1a21deccb5d49e92fd9f878c74ca9800c800d56220800f59ea980e63 +size 384 diff --git a/margin_logs/step_0000547.npy b/margin_logs/step_0000547.npy new file mode 100644 index 0000000..9817049 --- /dev/null +++ b/margin_logs/step_0000547.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:046eb730815ed8451a9b440a1d036d6e93d564d8267d9ce670057f89633b29a5 +size 384 diff --git a/margin_logs/step_0000548.npy b/margin_logs/step_0000548.npy new file mode 100644 index 0000000..7d7fd68 --- /dev/null +++ b/margin_logs/step_0000548.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:197292289142357b2832c0f05b719ca0232b78fecb84c4f7a4ba79ab2ff3041f +size 384 diff --git a/margin_logs/step_0000549.npy b/margin_logs/step_0000549.npy new file mode 100644 index 0000000..2cbe1d1 --- /dev/null +++ b/margin_logs/step_0000549.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:836a9d8dffac7947418f8039cb50af2c4c22703c1412f0fb57d20c1bc02a381b +size 384 diff --git a/margin_logs/step_0000550.npy b/margin_logs/step_0000550.npy new file mode 100644 index 0000000..5a7477d --- /dev/null +++ b/margin_logs/step_0000550.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:850473627ddc7348014d4876dc4bb43badeeb498325dd2fcdf0b1a3f0700ca77 +size 384 diff --git a/margin_logs/step_0000551.npy b/margin_logs/step_0000551.npy new file mode 100644 index 0000000..d0d741f --- /dev/null +++ b/margin_logs/step_0000551.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc990ecb28aa6985ac12b09a286a361c8f713ba352a0b363c30fcdea79e5fbb5 +size 384 diff --git a/margin_logs/step_0000552.npy b/margin_logs/step_0000552.npy new file mode 100644 index 0000000..621d1e3 --- /dev/null +++ b/margin_logs/step_0000552.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53fff6d73bf4a7d44467f797e06bd3e0c789aafe552fd7e33f593feca0700f67 +size 384 diff --git a/margin_logs/step_0000553.npy b/margin_logs/step_0000553.npy new file mode 100644 index 0000000..b68b8a8 --- /dev/null +++ b/margin_logs/step_0000553.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42b0a05e0cfe6dbb0cb81f7cb4a21fc8695768f49740f9056c750a78dac867da +size 384 diff --git a/margin_logs/step_0000554.npy b/margin_logs/step_0000554.npy new file mode 100644 index 0000000..52d6387 --- /dev/null +++ b/margin_logs/step_0000554.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4868e37a3785626951d7a2c8b8a5e5ab27fad6780c6a3d1dc3776f9ce76020f0 +size 384 diff --git a/margin_logs/step_0000555.npy b/margin_logs/step_0000555.npy new file mode 100644 index 0000000..2ca1ef0 --- /dev/null +++ b/margin_logs/step_0000555.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55643361375df954f9a16ac44117455624f5f369497a5382796493f495bced2f +size 384 diff --git a/margin_logs/step_0000556.npy b/margin_logs/step_0000556.npy new file mode 100644 index 0000000..24f478b --- /dev/null +++ b/margin_logs/step_0000556.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f20e2c83c359f993160ebe22b817de9ac4f112d4f083e44d64fb2af1edd2e8c8 +size 384 diff --git a/margin_logs/step_0000557.npy b/margin_logs/step_0000557.npy new file mode 100644 index 0000000..dbad4f3 --- /dev/null +++ b/margin_logs/step_0000557.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d66edf742e30922d3791bcb59ccad4bd1543c1f761c5b76b8aa66ea37cc6079 +size 384 diff --git a/margin_logs/step_0000558.npy b/margin_logs/step_0000558.npy new file mode 100644 index 0000000..8467475 --- /dev/null +++ b/margin_logs/step_0000558.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77a56b7b3f0a308bf06c82e2beb0953737870e9f135fd658ae855d1d579da0a7 +size 384 diff --git a/margin_logs/step_0000559.npy b/margin_logs/step_0000559.npy new file mode 100644 index 0000000..fe3eea0 --- /dev/null +++ b/margin_logs/step_0000559.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4bfab5328e6b45a2217e29351fc2346dc1b1181558c48bb042288462a6aae6a +size 384 diff --git a/margin_logs/step_0000560.npy b/margin_logs/step_0000560.npy new file mode 100644 index 0000000..8f8edbb --- /dev/null +++ b/margin_logs/step_0000560.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b5447fecbdb90c192419762935f3902f922f377e77a12b3f9f405945f843cfa +size 384 diff --git a/margin_logs/step_0000561.npy b/margin_logs/step_0000561.npy new file mode 100644 index 0000000..ec67b8d --- /dev/null +++ b/margin_logs/step_0000561.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c5e1fb05a3e369cd0c2e03fd66d4f43d1eb14fc449640bfaf7e82be2f72d86d +size 384 diff --git a/margin_logs/step_0000562.npy b/margin_logs/step_0000562.npy new file mode 100644 index 0000000..040ba57 --- /dev/null +++ b/margin_logs/step_0000562.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3130f7aa864e9948c3516f415519639a5dfbe62ded0eea47be4ac8f323b1c318 +size 384 diff --git a/margin_logs/step_0000563.npy b/margin_logs/step_0000563.npy new file mode 100644 index 0000000..76931e5 --- /dev/null +++ b/margin_logs/step_0000563.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f49c07b1d436a9b9a76489729415b3552c4c6279a881b603648317544c9bab45 +size 384 diff --git a/margin_logs/step_0000564.npy b/margin_logs/step_0000564.npy new file mode 100644 index 0000000..2174fb9 --- /dev/null +++ b/margin_logs/step_0000564.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7eae6229f866fd0bc1a173c8a12b9d4c15de01103e05a239610b7f685844c8f +size 384 diff --git a/margin_logs/step_0000565.npy b/margin_logs/step_0000565.npy new file mode 100644 index 0000000..50c5656 --- /dev/null +++ b/margin_logs/step_0000565.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa682350ce3b56793ddad1bb940d9058d986f4e59427abdb4c953f81d3b382c1 +size 384 diff --git a/margin_logs/step_0000566.npy b/margin_logs/step_0000566.npy new file mode 100644 index 0000000..6d243e5 --- /dev/null +++ b/margin_logs/step_0000566.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7aad38769c3add5bbbad011419bfc0a38bde6405e03376fa6c839591000a2d2 +size 384 diff --git a/margin_logs/step_0000567.npy b/margin_logs/step_0000567.npy new file mode 100644 index 0000000..8c54a4e --- /dev/null +++ b/margin_logs/step_0000567.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f9115e7d444b3d01f28f1653e618554512336c14d5e8189bc110fbe25b3d022 +size 384 diff --git a/margin_logs/step_0000568.npy b/margin_logs/step_0000568.npy new file mode 100644 index 0000000..56c2905 --- /dev/null +++ b/margin_logs/step_0000568.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02285fd948ba1c85b038e56d55091312f3f5e4a55478e0777ddd7d85f3badde1 +size 384 diff --git a/margin_logs/step_0000569.npy b/margin_logs/step_0000569.npy new file mode 100644 index 0000000..d9be0c2 --- /dev/null +++ b/margin_logs/step_0000569.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0481e32de86ad67c89b953eb6e6b2a13965f249b72e29e56f0e2c5ed4c38b419 +size 384 diff --git a/margin_logs/step_0000570.npy b/margin_logs/step_0000570.npy new file mode 100644 index 0000000..932a5c0 --- /dev/null +++ b/margin_logs/step_0000570.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5814117e52cf0cfcdf2ad60ec3f833714f933808f08dc44189ebdcfdc7eae097 +size 384 diff --git a/margin_logs/step_0000571.npy b/margin_logs/step_0000571.npy new file mode 100644 index 0000000..5c57799 --- /dev/null +++ b/margin_logs/step_0000571.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad800c73d1fc6f08334e084ff0ea29d150a088cf4718f2b038ced74e48db74d6 +size 384 diff --git a/margin_logs/step_0000572.npy b/margin_logs/step_0000572.npy new file mode 100644 index 0000000..92b1193 --- /dev/null +++ b/margin_logs/step_0000572.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b6ef3be35691f36b4c4a905a353cbb153f7cb266b5cad7b8a27cae8f541dd2f +size 384 diff --git a/margin_logs/step_0000573.npy b/margin_logs/step_0000573.npy new file mode 100644 index 0000000..e600916 --- /dev/null +++ b/margin_logs/step_0000573.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cadaea0718a49d69966d564102818d3f74c7f7fba442b628e8e5aad9555645d +size 384 diff --git a/margin_logs/step_0000574.npy b/margin_logs/step_0000574.npy new file mode 100644 index 0000000..08a965c --- /dev/null +++ b/margin_logs/step_0000574.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a7f6fabd451148fd93b0615760543d5a99cfcfea8a1b908d321ad60c44e90a8 +size 384 diff --git a/margin_logs/step_0000575.npy b/margin_logs/step_0000575.npy new file mode 100644 index 0000000..a32ea68 --- /dev/null +++ b/margin_logs/step_0000575.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc96ae3741b63644089e3494143de31d4923ff815758aa975bb5fd385dd35938 +size 384 diff --git a/margin_logs/step_0000576.npy b/margin_logs/step_0000576.npy new file mode 100644 index 0000000..13e1927 --- /dev/null +++ b/margin_logs/step_0000576.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2eb09ee8f6fe84fd9a76c074c99bebbcb100ef89a314458ebe50f9868bba01de +size 384 diff --git a/margin_logs/step_0000577.npy b/margin_logs/step_0000577.npy new file mode 100644 index 0000000..9c67057 --- /dev/null +++ b/margin_logs/step_0000577.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:755e8f64529f511f9ed035520e8f2939479ef483b539a6b03135ac960a276c2e +size 384 diff --git a/margin_logs/step_0000578.npy b/margin_logs/step_0000578.npy new file mode 100644 index 0000000..a2aab5e --- /dev/null +++ b/margin_logs/step_0000578.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c43333da6f8f2d7d19fe8cc7676a20df12f7b9fd706a960c1abff5f8f464ba5f +size 384 diff --git a/margin_logs/step_0000579.npy b/margin_logs/step_0000579.npy new file mode 100644 index 0000000..e71d274 --- /dev/null +++ b/margin_logs/step_0000579.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a330040b6247332682b44ebe0227c38442ee98732eeeae6d22bee4c165eb6d0 +size 384 diff --git a/margin_logs/step_0000580.npy b/margin_logs/step_0000580.npy new file mode 100644 index 0000000..eef1dce --- /dev/null +++ b/margin_logs/step_0000580.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d100247560ba08d908e4a55194a1fbb50ff1fcb5f0feed7c141fbb91503c7cd8 +size 384 diff --git a/margin_logs/step_0000581.npy b/margin_logs/step_0000581.npy new file mode 100644 index 0000000..e02daa1 --- /dev/null +++ b/margin_logs/step_0000581.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bc929fbcde1a05e9841ced79ae92eb8ced5292418784d67fdd139a64295d71e +size 384 diff --git a/margin_logs/step_0000582.npy b/margin_logs/step_0000582.npy new file mode 100644 index 0000000..5a33091 --- /dev/null +++ b/margin_logs/step_0000582.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8f38e083e166489e0811dc176b77c61c364d3f6cc3dcddb762812e442c540c5 +size 384 diff --git a/margin_logs/step_0000583.npy b/margin_logs/step_0000583.npy new file mode 100644 index 0000000..e1aec6e --- /dev/null +++ b/margin_logs/step_0000583.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64329fcc8efa056346319169e1f9341caecab1daf66f2ce41c2d06175b46e153 +size 384 diff --git a/margin_logs/step_0000584.npy b/margin_logs/step_0000584.npy new file mode 100644 index 0000000..a5bd12c --- /dev/null +++ b/margin_logs/step_0000584.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90d86b975ec3ea78a5cae0799f1425faed5bd7fb3ee8d65c5348901894e59ca3 +size 384 diff --git a/margin_logs/step_0000585.npy b/margin_logs/step_0000585.npy new file mode 100644 index 0000000..8ea7677 --- /dev/null +++ b/margin_logs/step_0000585.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45d4c44a88d899d76d3d16c0dd3a458e87357de1973e923d24bc25dd93496a58 +size 384 diff --git a/margin_logs/step_0000586.npy b/margin_logs/step_0000586.npy new file mode 100644 index 0000000..7f1a1f1 --- /dev/null +++ b/margin_logs/step_0000586.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb3a1dba35e7758f2c0087e32a47d5f31c1a27ebb4eae26c10d54445ec33cd39 +size 384 diff --git a/margin_logs/step_0000587.npy b/margin_logs/step_0000587.npy new file mode 100644 index 0000000..9d78788 --- /dev/null +++ b/margin_logs/step_0000587.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6cc3935556307410aebdce8aa783ffdfd0fc61c9d41e002dc99ed896628d8bf +size 384 diff --git a/margin_logs/step_0000588.npy b/margin_logs/step_0000588.npy new file mode 100644 index 0000000..217a459 --- /dev/null +++ b/margin_logs/step_0000588.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23d2461702f935039681dc03334df7ebefb531b3276cdbc309e0656d5eebefc2 +size 384 diff --git a/margin_logs/step_0000589.npy b/margin_logs/step_0000589.npy new file mode 100644 index 0000000..dda74df --- /dev/null +++ b/margin_logs/step_0000589.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97c011e031e4f44e22240833af8a0190a3edff7a46fa894e518af72e6f396c6d +size 384 diff --git a/margin_logs/step_0000590.npy b/margin_logs/step_0000590.npy new file mode 100644 index 0000000..feb446b --- /dev/null +++ b/margin_logs/step_0000590.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cb5e09136f6fa4c2b1d3f08a75a392c20c0f51a449fe72e53114d6987fce57f +size 384 diff --git a/margin_logs/step_0000591.npy b/margin_logs/step_0000591.npy new file mode 100644 index 0000000..8379e1e --- /dev/null +++ b/margin_logs/step_0000591.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb9198282d8592dbc2c6cc9e25ca48330614825805fd70a06c2636cd74af41e6 +size 384 diff --git a/margin_logs/step_0000592.npy b/margin_logs/step_0000592.npy new file mode 100644 index 0000000..75d0599 --- /dev/null +++ b/margin_logs/step_0000592.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4ad2fcaae4e41a5c8d4a51e93de6ee6bfb92853e9fa64f99867561123f73aba +size 384 diff --git a/margin_logs/step_0000593.npy b/margin_logs/step_0000593.npy new file mode 100644 index 0000000..8f4a154 --- /dev/null +++ b/margin_logs/step_0000593.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03e7448b561da79bfed21895c4b1448c48bb4e2efeb6988ff70b9eef47437a3c +size 384 diff --git a/margin_logs/step_0000594.npy b/margin_logs/step_0000594.npy new file mode 100644 index 0000000..ef7265a --- /dev/null +++ b/margin_logs/step_0000594.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b784bd2a40dbe493584ac8ca754c49441067e7ed7f2c15cfbcb0c9baaeb47383 +size 384 diff --git a/margin_logs/step_0000595.npy b/margin_logs/step_0000595.npy new file mode 100644 index 0000000..7dc4379 --- /dev/null +++ b/margin_logs/step_0000595.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50c2a3ceeb2e54e83b08641bfc5a1237f621fe9583fa53a0a6a432f8639bb53e +size 384 diff --git a/margin_logs/step_0000596.npy b/margin_logs/step_0000596.npy new file mode 100644 index 0000000..2ef7891 --- /dev/null +++ b/margin_logs/step_0000596.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f0b512dc38efe82d9bf6782e84320c1f8cecbfcbac2a1004ec3797fbc0dc1b1 +size 384 diff --git a/margin_logs/step_0000597.npy b/margin_logs/step_0000597.npy new file mode 100644 index 0000000..4d49ce0 --- /dev/null +++ b/margin_logs/step_0000597.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4c14366b0fd9597e9870ee4a11622405dbb8912b61dd987fb41cd9317d23eac +size 384 diff --git a/margin_logs/step_0000598.npy b/margin_logs/step_0000598.npy new file mode 100644 index 0000000..e734467 --- /dev/null +++ b/margin_logs/step_0000598.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69e9a0e5dc475f64d836ac742420f8d5b3d72b541dc6cab5d1e2a43ac4b37fd1 +size 384 diff --git a/margin_logs/step_0000599.npy b/margin_logs/step_0000599.npy new file mode 100644 index 0000000..8e5fbcf --- /dev/null +++ b/margin_logs/step_0000599.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4898df4d85994cf7410064cb76de54fd12f1c3395860c4556e2e6f673acef5a2 +size 384 diff --git a/margin_logs/step_0000600.npy b/margin_logs/step_0000600.npy new file mode 100644 index 0000000..810c551 --- /dev/null +++ b/margin_logs/step_0000600.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdc2f68a3ee00bdfdd2164573da28ab3cae3389d46cf0af944c16e1f42cf38ef +size 384 diff --git a/margin_logs/step_0000601.npy b/margin_logs/step_0000601.npy new file mode 100644 index 0000000..e6331e7 --- /dev/null +++ b/margin_logs/step_0000601.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e1fc250aa796a4b2b60bb5a768b65b89b703f1c03e26be15191c1fc8d580ee0 +size 384 diff --git a/margin_logs/step_0000602.npy b/margin_logs/step_0000602.npy new file mode 100644 index 0000000..182dbc4 --- /dev/null +++ b/margin_logs/step_0000602.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9084093ca9a72915c8663fe0888222dff9a33cb8e9a66efb8420f5828f54d526 +size 384 diff --git a/margin_logs/step_0000603.npy b/margin_logs/step_0000603.npy new file mode 100644 index 0000000..197b376 --- /dev/null +++ b/margin_logs/step_0000603.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afea4f015cae564be71dcb75a9ec342e432af4ee6baf11562c56d52957bb7d55 +size 384 diff --git a/margin_logs/step_0000604.npy b/margin_logs/step_0000604.npy new file mode 100644 index 0000000..46b0403 --- /dev/null +++ b/margin_logs/step_0000604.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88822c04d2084161bf443a4ebf434bbeef0469a1201359490a44d26f6ee991bf +size 384 diff --git a/margin_logs/step_0000605.npy b/margin_logs/step_0000605.npy new file mode 100644 index 0000000..637ffc2 --- /dev/null +++ b/margin_logs/step_0000605.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40f245bcc5f11999bd496de36ebe6b75c1fce560cf8cfa263dbd351927c507a +size 384 diff --git a/margin_logs/step_0000606.npy b/margin_logs/step_0000606.npy new file mode 100644 index 0000000..8a6c2fd --- /dev/null +++ b/margin_logs/step_0000606.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c558d5baeb3b32d2a15aae01a0c56a6266225005908b622a032cc550dae1f8ce +size 384 diff --git a/margin_logs/step_0000607.npy b/margin_logs/step_0000607.npy new file mode 100644 index 0000000..f7f1bfc --- /dev/null +++ b/margin_logs/step_0000607.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75a7179bfd04a9e34850c9e3c1f65848dadf4ba8a2fada239c9f9e0a5ee3490d +size 384 diff --git a/margin_logs/step_0000608.npy b/margin_logs/step_0000608.npy new file mode 100644 index 0000000..4624fc2 --- /dev/null +++ b/margin_logs/step_0000608.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8fd936618f6cf59fd8afe303a3de4588e1b71360fdc9e7b89aa290198c90149 +size 384 diff --git a/margin_logs/step_0000609.npy b/margin_logs/step_0000609.npy new file mode 100644 index 0000000..4c8739e --- /dev/null +++ b/margin_logs/step_0000609.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8da7126796d550fd814c30185c096f608507906c5e3671adfb5315d54a7fb51e +size 384 diff --git a/margin_logs/step_0000610.npy b/margin_logs/step_0000610.npy new file mode 100644 index 0000000..0517fb5 --- /dev/null +++ b/margin_logs/step_0000610.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bef56ee87233ddec3fc99ce0b934f3cde08aba69702e0c62a420da57c8caa026 +size 384 diff --git a/margin_logs/step_0000611.npy b/margin_logs/step_0000611.npy new file mode 100644 index 0000000..8e1e336 --- /dev/null +++ b/margin_logs/step_0000611.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae051f98af95e356252ae2f0abcac509162d6ea33bf1b07dd518f626cf2d211d +size 384 diff --git a/margin_logs/step_0000612.npy b/margin_logs/step_0000612.npy new file mode 100644 index 0000000..046aee4 --- /dev/null +++ b/margin_logs/step_0000612.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2878fdeff64568581574eb28546d0a086b8199e87a38f8d95535ef99db62c394 +size 384 diff --git a/margin_logs/step_0000613.npy b/margin_logs/step_0000613.npy new file mode 100644 index 0000000..14bb275 --- /dev/null +++ b/margin_logs/step_0000613.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fdde3497f4cebd963e272a70f375e6672d79685875f1dcf288a48bcceeff992 +size 384 diff --git a/margin_logs/step_0000614.npy b/margin_logs/step_0000614.npy new file mode 100644 index 0000000..ca78606 --- /dev/null +++ b/margin_logs/step_0000614.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e41a10de958f67eac0a41fbc201cb82403aa18834a7134648f70be967feaa3a8 +size 384 diff --git a/margin_logs/step_0000615.npy b/margin_logs/step_0000615.npy new file mode 100644 index 0000000..2aca7cd --- /dev/null +++ b/margin_logs/step_0000615.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3e0b20653dc110a03f22965ba236224df8dc76f44a0d13d1524df142e8bc578 +size 384 diff --git a/margin_logs/step_0000616.npy b/margin_logs/step_0000616.npy new file mode 100644 index 0000000..dc78de8 --- /dev/null +++ b/margin_logs/step_0000616.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10225b44d1df52345fcaf6e282b91aac067641a37d5604f516219b9306c0811b +size 384 diff --git a/margin_logs/step_0000617.npy b/margin_logs/step_0000617.npy new file mode 100644 index 0000000..8d9409f --- /dev/null +++ b/margin_logs/step_0000617.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7382b0ab2886f19ec7ef8a3d6e6817c4a98c47be83343f1810236cbfbab4374a +size 384 diff --git a/margin_logs/step_0000618.npy b/margin_logs/step_0000618.npy new file mode 100644 index 0000000..ca8a5a4 --- /dev/null +++ b/margin_logs/step_0000618.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cea4b46633bb762222f720accd958e0cd5103c0dcd29c69b73fbf171995b735b +size 384 diff --git a/margin_logs/step_0000619.npy b/margin_logs/step_0000619.npy new file mode 100644 index 0000000..a4420fd --- /dev/null +++ b/margin_logs/step_0000619.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e153f8a633e4a0a37de91754b0420df5c10ef4207b3e476f0e193af5ace0d78 +size 384 diff --git a/margin_logs/step_0000620.npy b/margin_logs/step_0000620.npy new file mode 100644 index 0000000..1e6ec0d --- /dev/null +++ b/margin_logs/step_0000620.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:258fe1cd434131584e3b34882b9b8a3a3c0d73a846d2fca4ecae7278ab7e8367 +size 384 diff --git a/margin_logs/step_0000621.npy b/margin_logs/step_0000621.npy new file mode 100644 index 0000000..43e235c --- /dev/null +++ b/margin_logs/step_0000621.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad766649568a4efd66642c4a8723590fb08dfc57ab1925585c26b190aaf669b2 +size 384 diff --git a/margin_logs/step_0000622.npy b/margin_logs/step_0000622.npy new file mode 100644 index 0000000..415b065 --- /dev/null +++ b/margin_logs/step_0000622.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a414943544f14efbeabd377a1733e5261b83bb665324347546b068faebd4a0d +size 384 diff --git a/margin_logs/step_0000623.npy b/margin_logs/step_0000623.npy new file mode 100644 index 0000000..2f9ab42 --- /dev/null +++ b/margin_logs/step_0000623.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a10902017735d48a9cfb7232b68012f319859dc7e418b9890807770b002daa5 +size 384 diff --git a/margin_logs/step_0000624.npy b/margin_logs/step_0000624.npy new file mode 100644 index 0000000..9437008 --- /dev/null +++ b/margin_logs/step_0000624.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef005ed8b30875e68607e41204095e3ff74bbe3f044361a40041826589b9b7a4 +size 384 diff --git a/margin_logs/step_0000625.npy b/margin_logs/step_0000625.npy new file mode 100644 index 0000000..ad4e0b4 --- /dev/null +++ b/margin_logs/step_0000625.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b66e32506f4276dd383cfda6ea4c67cef2dcb7882dd0a3dafff0383c329de57f +size 384 diff --git a/margin_logs/step_0000626.npy b/margin_logs/step_0000626.npy new file mode 100644 index 0000000..65fe695 --- /dev/null +++ b/margin_logs/step_0000626.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c7bae6d57351a95b754bc80d08f34bfe7ec1071dc6d05c15fd10db50e367bc4 +size 384 diff --git a/margin_logs/step_0000627.npy b/margin_logs/step_0000627.npy new file mode 100644 index 0000000..7fbbe10 --- /dev/null +++ b/margin_logs/step_0000627.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b214d8c01323a5b982d3fe49edaa7258443b2373419e7e0d1cd90e32d8bc6f01 +size 384 diff --git a/margin_logs/step_0000628.npy b/margin_logs/step_0000628.npy new file mode 100644 index 0000000..eabc422 --- /dev/null +++ b/margin_logs/step_0000628.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c538f916b6f92083c59fce0fcf2b5e2601e22bba1a28ddd205f741bb7b34c81f +size 384 diff --git a/margin_logs/step_0000629.npy b/margin_logs/step_0000629.npy new file mode 100644 index 0000000..cc6bd9e --- /dev/null +++ b/margin_logs/step_0000629.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:affef0b18c068a67897e8c8bcc7247b69d01d4acc0cd2443417328e5bd3a7056 +size 384 diff --git a/margin_logs/step_0000630.npy b/margin_logs/step_0000630.npy new file mode 100644 index 0000000..61cac12 --- /dev/null +++ b/margin_logs/step_0000630.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82de569638b70403ec9077756cdffc87185b35aba318fb83209cba304b9939cd +size 384 diff --git a/margin_logs/step_0000631.npy b/margin_logs/step_0000631.npy new file mode 100644 index 0000000..7825a48 --- /dev/null +++ b/margin_logs/step_0000631.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ff8eeb1be94e2abd2ffbbabd68de97c95cc5c8c6ad9faac38cb4d9878a05fda +size 384 diff --git a/margin_logs/step_0000632.npy b/margin_logs/step_0000632.npy new file mode 100644 index 0000000..144f578 --- /dev/null +++ b/margin_logs/step_0000632.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b8ef654d0e9ce346eb60fea8de9111fcc4e4cc1ea50171a90d0aab37b7e1fd6 +size 384 diff --git a/margin_logs/step_0000633.npy b/margin_logs/step_0000633.npy new file mode 100644 index 0000000..d45a22e --- /dev/null +++ b/margin_logs/step_0000633.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b4beb5980f43346ac27b498776b14f19e005c9ef17c99d9f060e037776b2135 +size 384 diff --git a/margin_logs/step_0000634.npy b/margin_logs/step_0000634.npy new file mode 100644 index 0000000..110916a --- /dev/null +++ b/margin_logs/step_0000634.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7224732a7e40dddf2a772636402d1841ceb77ff7dba7ee64855e1ef1c7169f5 +size 384 diff --git a/margin_logs/step_0000635.npy b/margin_logs/step_0000635.npy new file mode 100644 index 0000000..82a736d --- /dev/null +++ b/margin_logs/step_0000635.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f0866437e93ab8cb26e995093b81802c53a598ff3907fa2a26fd0985e2d1463 +size 384 diff --git a/margin_logs/step_0000636.npy b/margin_logs/step_0000636.npy new file mode 100644 index 0000000..e4f7926 --- /dev/null +++ b/margin_logs/step_0000636.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58290ee0081ee3f6a4ab838d6d1c61a7204793d8acb69f1ebe044b568f1b0565 +size 384 diff --git a/margin_logs/step_0000637.npy b/margin_logs/step_0000637.npy new file mode 100644 index 0000000..1a9b2a4 --- /dev/null +++ b/margin_logs/step_0000637.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff3565895f2cea00026664197448162d2cc98aadfefa8d0393fe182b8fa6b85c +size 384 diff --git a/margin_logs/step_0000638.npy b/margin_logs/step_0000638.npy new file mode 100644 index 0000000..9fae433 --- /dev/null +++ b/margin_logs/step_0000638.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b014cf6d123c6881743704f37ba60c38ef96c7af38135937f3d8f4771b926f1 +size 384 diff --git a/margin_logs/step_0000639.npy b/margin_logs/step_0000639.npy new file mode 100644 index 0000000..ff87543 --- /dev/null +++ b/margin_logs/step_0000639.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:394b6f4b4e60e59351675fe24351e9ed67ce79b40e9d5b42b5b29cb00ba2d923 +size 384 diff --git a/margin_logs/step_0000640.npy b/margin_logs/step_0000640.npy new file mode 100644 index 0000000..118e355 --- /dev/null +++ b/margin_logs/step_0000640.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce76c4167cb0be7866d33ba0fda866a3ee3b96574b962b5e27af4121007bf77d +size 384 diff --git a/margin_logs/step_0000641.npy b/margin_logs/step_0000641.npy new file mode 100644 index 0000000..1727a5d --- /dev/null +++ b/margin_logs/step_0000641.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:190e7aa3b87adb26736963f86a91e2ef43522cb11840baa7eed4d9d1cf7d81ae +size 384 diff --git a/margin_logs/step_0000642.npy b/margin_logs/step_0000642.npy new file mode 100644 index 0000000..a76862f --- /dev/null +++ b/margin_logs/step_0000642.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f92e88738c29743caffe81c947df48ff1fa5fdacad5c7ce4a6f77f46f932c1c2 +size 384 diff --git a/margin_logs/step_0000643.npy b/margin_logs/step_0000643.npy new file mode 100644 index 0000000..cb92191 --- /dev/null +++ b/margin_logs/step_0000643.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bd4c702d33ac2af23338d0dce3ca18a5fc06b6ec6ec1440b24c3f3c7a92d7c6 +size 384 diff --git a/margin_logs/step_0000644.npy b/margin_logs/step_0000644.npy new file mode 100644 index 0000000..d762abc --- /dev/null +++ b/margin_logs/step_0000644.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a43088a9382c774e8f5353d1ada040d9caac95c6dd6f6f6d89537b8a29db39c +size 384 diff --git a/margin_logs/step_0000645.npy b/margin_logs/step_0000645.npy new file mode 100644 index 0000000..a09bacc --- /dev/null +++ b/margin_logs/step_0000645.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:942aaceaf1f98209a42f3fcfb0fa5eb61025611f1f4d5bd959e6fd5eeb3a5af8 +size 384 diff --git a/margin_logs/step_0000646.npy b/margin_logs/step_0000646.npy new file mode 100644 index 0000000..cd36c20 --- /dev/null +++ b/margin_logs/step_0000646.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d172031335bed9c01deb0b770fee3f3707907748b5d9a933ec77a34ca1317d48 +size 384 diff --git a/margin_logs/step_0000647.npy b/margin_logs/step_0000647.npy new file mode 100644 index 0000000..2974312 --- /dev/null +++ b/margin_logs/step_0000647.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dd7b5da63680510aeed3841fe9be0a40f3efb4f1bb6c952bcb2d7bda390d7ba +size 384 diff --git a/margin_logs/step_0000648.npy b/margin_logs/step_0000648.npy new file mode 100644 index 0000000..f662faa --- /dev/null +++ b/margin_logs/step_0000648.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87865da3033e3c4bdf706b1ebf226d4ba74929a4d050abdc061e30e3856bd501 +size 384 diff --git a/margin_logs/step_0000649.npy b/margin_logs/step_0000649.npy new file mode 100644 index 0000000..6debf83 --- /dev/null +++ b/margin_logs/step_0000649.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbdc65a208f997abd526ad8e8e340043b97815531d3455c50c32264aef4811c4 +size 384 diff --git a/margin_logs/step_0000650.npy b/margin_logs/step_0000650.npy new file mode 100644 index 0000000..c64bb56 --- /dev/null +++ b/margin_logs/step_0000650.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edc0a18251980333eb61513f94ccfe60c0e91c7578c0c697c8317d7021ec3f50 +size 384 diff --git a/margin_logs/step_0000651.npy b/margin_logs/step_0000651.npy new file mode 100644 index 0000000..f4988a5 --- /dev/null +++ b/margin_logs/step_0000651.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2fb6c17fe26c7a1de7b75d93bdad4fca681d50fbdcaae819f280a6cfdf953d4 +size 384 diff --git a/margin_logs/step_0000652.npy b/margin_logs/step_0000652.npy new file mode 100644 index 0000000..27400c3 --- /dev/null +++ b/margin_logs/step_0000652.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da33d08c6c28e80deb67c9c3eb5a03fcf5ad0f2699ac504b5f408a588ca910eb +size 384 diff --git a/margin_logs/step_0000653.npy b/margin_logs/step_0000653.npy new file mode 100644 index 0000000..078cd1d --- /dev/null +++ b/margin_logs/step_0000653.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e74cc044dad86337f29e125f1b44f907ab1f88be55c3e6a7343670c80758f10 +size 384 diff --git a/margin_logs/step_0000654.npy b/margin_logs/step_0000654.npy new file mode 100644 index 0000000..0212ba7 --- /dev/null +++ b/margin_logs/step_0000654.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f62de38f4aadb7b0b7db03bcf8c018260449faaa82c96c3aebb22eba50fc6140 +size 384 diff --git a/margin_logs/step_0000655.npy b/margin_logs/step_0000655.npy new file mode 100644 index 0000000..d3be6fe --- /dev/null +++ b/margin_logs/step_0000655.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eafdeff580dff33e9b82adac4591f265fce5869347491f991c74b8ee5e088176 +size 384 diff --git a/margin_logs/step_0000656.npy b/margin_logs/step_0000656.npy new file mode 100644 index 0000000..b3deeaf --- /dev/null +++ b/margin_logs/step_0000656.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4324d5862c202438af5cc07b5d9306dcdd90b7d626666bd692889bfd4f9c9427 +size 384 diff --git a/margin_logs/step_0000657.npy b/margin_logs/step_0000657.npy new file mode 100644 index 0000000..72220b5 --- /dev/null +++ b/margin_logs/step_0000657.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c75117a8832a050a28dedf7ec3d260f3be7bae01feee60045486ddf2ce1ad53 +size 384 diff --git a/margin_logs/step_0000658.npy b/margin_logs/step_0000658.npy new file mode 100644 index 0000000..dc119a1 --- /dev/null +++ b/margin_logs/step_0000658.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62a2c4e3287c3e84412059cb9279eb1e2911b6c3ba0163194dcfa2074dc1a848 +size 384 diff --git a/margin_logs/step_0000659.npy b/margin_logs/step_0000659.npy new file mode 100644 index 0000000..7600652 --- /dev/null +++ b/margin_logs/step_0000659.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86ff98184e8937e5ec18125e5e1c9cfc0e209b2e49a2d76bd619ae9614a168a0 +size 384 diff --git a/margin_logs/step_0000660.npy b/margin_logs/step_0000660.npy new file mode 100644 index 0000000..d7420d9 --- /dev/null +++ b/margin_logs/step_0000660.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49625bd42e0b094a8c889df2d2561e0df9fc2a1e30169f0197c100aabbb9addd +size 384 diff --git a/margin_logs/step_0000661.npy b/margin_logs/step_0000661.npy new file mode 100644 index 0000000..f187c62 --- /dev/null +++ b/margin_logs/step_0000661.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a5e2e826a83be6018d2c980098b276a0a9c7d5a438dcbd8c2e7fc024504266 +size 384 diff --git a/margin_logs/step_0000662.npy b/margin_logs/step_0000662.npy new file mode 100644 index 0000000..f0f00a1 --- /dev/null +++ b/margin_logs/step_0000662.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c80a523896816f62d3327d50de1badb3e4358b08b17a9ffb5924e585451c35a +size 384 diff --git a/margin_logs/step_0000663.npy b/margin_logs/step_0000663.npy new file mode 100644 index 0000000..bd904c8 --- /dev/null +++ b/margin_logs/step_0000663.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba2f1e46d286ad28dc51c14147946baf1f5c801392c857363f16a90397e935e6 +size 384 diff --git a/margin_logs/step_0000664.npy b/margin_logs/step_0000664.npy new file mode 100644 index 0000000..4041fae --- /dev/null +++ b/margin_logs/step_0000664.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce08371f8f678ec79eaba16ca4a685856e40b3b682aeb1fcc3fdde4cddc90d5e +size 384 diff --git a/margin_logs/step_0000665.npy b/margin_logs/step_0000665.npy new file mode 100644 index 0000000..c2a6b8a --- /dev/null +++ b/margin_logs/step_0000665.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:baff87e4ec0058be640f30cbee37dfe7deef0e177b4a6ae87044ed6c11c68a49 +size 384 diff --git a/margin_logs/step_0000666.npy b/margin_logs/step_0000666.npy new file mode 100644 index 0000000..e4a2883 --- /dev/null +++ b/margin_logs/step_0000666.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da7fff4a8dd4e3b0405545c4400fc4499f79175f1270f39c5f512094f0f977c7 +size 384 diff --git a/margin_logs/step_0000667.npy b/margin_logs/step_0000667.npy new file mode 100644 index 0000000..3b5d9b6 --- /dev/null +++ b/margin_logs/step_0000667.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99e8b4df3e71dc655c48f73d39fff70f5247da13d955cb186eedde0ed6ee6d57 +size 384 diff --git a/margin_logs/step_0000668.npy b/margin_logs/step_0000668.npy new file mode 100644 index 0000000..a47894b --- /dev/null +++ b/margin_logs/step_0000668.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa665e4b07ec69bea6c6ab3eb33e31673b6687865c52e911d17bfbadd9863b2a +size 384 diff --git a/margin_logs/step_0000669.npy b/margin_logs/step_0000669.npy new file mode 100644 index 0000000..d477b3b --- /dev/null +++ b/margin_logs/step_0000669.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:303e87076b8bc99dcc21a5db50a9e2f585be29a298336a5e4d6e14ddd08ccbac +size 384 diff --git a/margin_logs/step_0000670.npy b/margin_logs/step_0000670.npy new file mode 100644 index 0000000..e58f6fd --- /dev/null +++ b/margin_logs/step_0000670.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44c4dcc67dbeaa64fa62c2f2e04e800bdfe28c096eadad27586a5ec82dcba8fa +size 384 diff --git a/margin_logs/step_0000671.npy b/margin_logs/step_0000671.npy new file mode 100644 index 0000000..098feef --- /dev/null +++ b/margin_logs/step_0000671.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1901e537f35f03c632bc08d27be4c46896fd2be60bf6ab4df390be5d9991649 +size 384 diff --git a/margin_logs/step_0000672.npy b/margin_logs/step_0000672.npy new file mode 100644 index 0000000..f5cc1f5 --- /dev/null +++ b/margin_logs/step_0000672.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87d80de04b7e79f0ab272441690c2b9ea3cb886844854cb802f852224f656fe6 +size 384 diff --git a/margin_logs/step_0000673.npy b/margin_logs/step_0000673.npy new file mode 100644 index 0000000..76fd22d --- /dev/null +++ b/margin_logs/step_0000673.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f15f952fb4d978470c94aca305bdf653c16c8b5835ebe9c1ae464735c6ee482 +size 384 diff --git a/margin_logs/step_0000674.npy b/margin_logs/step_0000674.npy new file mode 100644 index 0000000..3936ace --- /dev/null +++ b/margin_logs/step_0000674.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a7c68bf9b0a9872cdb0aebfbbcb9c25c76355b5dde1559a265313cc5f9c0b29 +size 384 diff --git a/margin_logs/step_0000675.npy b/margin_logs/step_0000675.npy new file mode 100644 index 0000000..ac04d5e --- /dev/null +++ b/margin_logs/step_0000675.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1c429b3dddeb547022ebef57bb606eaf457c6fe79d3bc2f84252ae770cfe98c +size 384 diff --git a/margin_logs/step_0000676.npy b/margin_logs/step_0000676.npy new file mode 100644 index 0000000..919e24b --- /dev/null +++ b/margin_logs/step_0000676.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5519d834a07ebcb460071a7f46fc9d7fcd2f9b79cc8e17ffb88581bfa3077958 +size 384 diff --git a/margin_logs/step_0000677.npy b/margin_logs/step_0000677.npy new file mode 100644 index 0000000..f608904 --- /dev/null +++ b/margin_logs/step_0000677.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32a1290cb9c299be43be759b94424cb4c2fa739ebd191cb2c5a2fe1be0dcfba6 +size 384 diff --git a/margin_logs/step_0000678.npy b/margin_logs/step_0000678.npy new file mode 100644 index 0000000..6fd80d8 --- /dev/null +++ b/margin_logs/step_0000678.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f90778579a50c38f661c148fd3884e92616573e158e51fb8fdd1c2c5ef89d7d +size 384 diff --git a/margin_logs/step_0000679.npy b/margin_logs/step_0000679.npy new file mode 100644 index 0000000..74299c6 --- /dev/null +++ b/margin_logs/step_0000679.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f4a7479b21fd8d003889144880365347b4630f333438311ed0afcc37560769d +size 384 diff --git a/margin_logs/step_0000680.npy b/margin_logs/step_0000680.npy new file mode 100644 index 0000000..745e1ef --- /dev/null +++ b/margin_logs/step_0000680.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7a09da02cdbbe851ad7f31d0b9cca15f951a24b2c918f10901f1781b4bafbb0 +size 384 diff --git a/margin_logs/step_0000681.npy b/margin_logs/step_0000681.npy new file mode 100644 index 0000000..ca4123b --- /dev/null +++ b/margin_logs/step_0000681.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:414f6a39d3f986c071a6f744d1ffa2a76d23b4162d0bc0b1cbc183a8272ff6a9 +size 384 diff --git a/model-00001-of-00007.safetensors b/model-00001-of-00007.safetensors new file mode 100644 index 0000000..05d676a --- /dev/null +++ b/model-00001-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67b74a2501f68106448d6ae2035ca725eb0cfe549ebf25edc73cb161fdc5e253 +size 4886466168 diff --git a/model-00002-of-00007.safetensors b/model-00002-of-00007.safetensors new file mode 100644 index 0000000..c76436f --- /dev/null +++ b/model-00002-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2438831d24546769477926039d4251a6f227481f9d7881a44255addb1fbd1fd +size 4832007448 diff --git a/model-00003-of-00007.safetensors b/model-00003-of-00007.safetensors new file mode 100644 index 0000000..5d3ad7b --- /dev/null +++ b/model-00003-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9c11182b9ed149b023defb0706d7a8f6674b17e0ecb76f9dcaa9e5fed3353fa +size 4999813112 diff --git a/model-00004-of-00007.safetensors b/model-00004-of-00007.safetensors new file mode 100644 index 0000000..36bfc56 --- /dev/null +++ b/model-00004-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:913a68a85eed735cf9ebb9ad97774287bd4a0fde46baac52bbf4781263735865 +size 4999813128 diff --git a/model-00005-of-00007.safetensors b/model-00005-of-00007.safetensors new file mode 100644 index 0000000..becc044 --- /dev/null +++ b/model-00005-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fff1f01742d71e43ce500184e617a8762322a362be279cdc4c9463e395cc548 +size 4832007496 diff --git a/model-00006-of-00007.safetensors b/model-00006-of-00007.safetensors new file mode 100644 index 0000000..8a4165f --- /dev/null +++ b/model-00006-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2eeb24d4958565227ce30af96b98ec87006cfb9c5b11df3e75b4ed874da7f936 +size 4999813120 diff --git a/model-00007-of-00007.safetensors b/model-00007-of-00007.safetensors new file mode 100644 index 0000000..01941d5 --- /dev/null +++ b/model-00007-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8ff9ff26fcc538148183ac000b0867ad1a024575f6138dcb2690ddd330dccae +size 2571158184 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..0985084 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,298 @@ +{ + "metadata": { + "total_size": 32121044992 + }, + "weight_map": { + "lm_head.weight": "model-00007-of-00007.safetensors", + "model.embed_tokens.weight": "model-00001-of-00007.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.10.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.15.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.20.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.21.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.26.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.3.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.30.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.input_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.4.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.9.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.norm.weight": "model-00007-of-00007.safetensors" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..e5b39b6 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..86a3394 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..8c6916a --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2064 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 2048, + "pad_token": "<|end_of_text|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/train.log b/train.log new file mode 100644 index 0000000..940463e --- /dev/null +++ b/train.log @@ -0,0 +1,1739 @@ +2026-04-17 21:24:20 - INFO - __main__ - Model parameters ModelArguments(base_model_revision=None, model_name_or_path='/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-4xh200-batch-64-20260416-162101', model_revision='main', model_code_revision=None, torch_dtype='bfloat16', tokenizer_name_or_path=None, trust_remote_code=False, attn_implementation='flash_attention_2', use_peft=False, lora_r=16, lora_alpha=32, lora_dropout=0.05, lora_target_modules=None, lora_modules_to_save=None, load_in_8bit=False, load_in_4bit=False, bnb_4bit_quant_type='nf4', use_bnb_nested_quant=False, bnb_4bit_quant_storage='uint8') +2026-04-17 21:24:20 - INFO - __main__ - Data parameters DataArguments(chat_template=None, dataset_mixer={'Anthropic/hh-rlhf': 1.0}, text_column='text', dataset_splits=['train', 'test'], dataset_configs=['helpful-base'], dataset_dir=None, preprocessing_num_workers=12, use_persistent_hf_cache=True, hf_cache_dir='/scratch/feng.yulu/dynamic-dpo-v4/hf/datasets', truncation_side=None, auto_insert_empty_system_msg=True, preprocessing_log_samples=0, preprocessing_log_dir=None) +2026-04-17 21:24:20 - INFO - __main__ - Training/evaluation parameters MarginDPOConfig( +_n_gpu=1, +accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False}, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.999, +adam_epsilon=1e-08, +auto_find_batch_size=False, +average_tokens_across_devices=False, +batch_eval_metrics=False, +beta=0.1, +bf16=True, +bf16_full_eval=False, +data_seed=None, +dataloader_drop_last=True, +dataloader_num_workers=0, +dataloader_persistent_workers=False, +dataloader_pin_memory=True, +dataloader_prefetch_factor=None, +dataset_num_proc=12, +ddp_backend=None, +ddp_broadcast_buffers=None, +ddp_bucket_cap_mb=None, +ddp_find_unused_parameters=None, +ddp_timeout=1800, +debug=[], +deepspeed=None, +disable_dropout=True, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=False, +eval_accumulation_steps=None, +eval_delay=0, +eval_do_concat_batches=True, +eval_on_start=False, +eval_steps=100, +eval_strategy=IntervalStrategy.STEPS, +eval_use_gather_object=False, +f_alpha_divergence_coef=1.0, +f_divergence_type=reverse_kl, +force_use_ref_model=False, +fp16=False, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +fsdp=[], +fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, +fsdp_min_num_params=0, +fsdp_transformer_layer_cls_to_wrap=None, +full_determinism=False, +generate_during_eval=False, +gradient_accumulation_steps=2, +gradient_checkpointing=True, +gradient_checkpointing_kwargs={'use_reentrant': False}, +greater_is_better=None, +group_by_length=False, +half_precision_backend=auto, +hub_always_push=False, +hub_margin_dataset_id=W-61/llama-3-8b-base-margin-dpo-hh-helpful-margin-log, +hub_model_id=W-61/llama-3-8b-base-margin-dpo-hh-helpful, +hub_model_revision=main, +hub_private_repo=None, +hub_strategy=HubStrategy.EVERY_SAVE, +hub_token=, +ignore_data_skip=False, +include_for_metrics=[], +include_inputs_for_metrics=False, +include_num_input_tokens_seen=False, +include_tokens_per_second=False, +is_encoder_decoder=None, +jit_mode_eval=False, +label_names=None, +label_pad_token_id=-100, +label_smoothing=0.0, +label_smoothing_factor=0.0, +learning_rate=5e-07, +length_column_name=length, +load_best_model_at_end=False, +local_rank=0, +log_level=info, +log_level_replica=warning, +log_on_each_node=True, +logging_dir=outputs/llama-3-8b-base-margin-dpo-hh-helpful/runs/Apr17_21-24-20_d4052, +logging_first_step=True, +logging_nan_inf_filter=True, +logging_steps=1, +logging_strategy=IntervalStrategy.STEPS, +loss_type=sigmoid, +lr_scheduler_kwargs={}, +lr_scheduler_type=SchedulerType.COSINE, +margin_dataset_private=None, +margin_dataset_split=train, +margin_log_path=/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs, +margin_log_steps=1, +margin_save_full=True, +max_grad_norm=1.0, +max_length=512, +max_prompt_length=256, +max_steps=-1, +max_target_length=None, +metric_for_best_model=None, +model_adapter_name=None, +model_init_kwargs=None, +mp_parameters=, +neftune_noise_alpha=None, +no_cuda=False, +non_finite_logits_handling=error, +num_train_epochs=1, +optim=OptimizerNames.ADAMW_TORCH, +optim_args=None, +optim_target_modules=None, +output_dir=/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312, +overwrite_output_dir=False, +padding_value=None, +past_index=-1, +per_device_eval_batch_size=8, +per_device_train_batch_size=8, +post_tokenization_log_dir=None, +post_tokenization_log_samples=0, +precompute_ref_batch_size=None, +precompute_ref_eval_batch_size=None, +precompute_ref_log_probs=False, +prediction_loss_only=False, +push_margin_dataset=False, +push_to_hub=False, +push_to_hub_model_id=None, +push_to_hub_organization=None, +push_to_hub_token=, +ray_scope=last, +ref_adapter_name=None, +ref_model_init_kwargs=None, +ref_model_mixup_alpha=0.9, +ref_model_sync_steps=64, +reference_free=False, +remove_unused_columns=False, +report_to=['wandb'], +require_explicit_ref_model=True, +restore_callback_states_from_checkpoint=False, +resume_from_checkpoint=None, +reuse_tokenized_dataset=True, +rpo_alpha=None, +run_name=llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312, +save_on_each_node=False, +save_only_model=False, +save_safetensors=True, +save_steps=200, +save_strategy=SaveStrategy.STEPS, +save_total_limit=2, +seed=42, +sft_weight=0.0, +skip_memory_metrics=True, +sync_ref_model=False, +tf32=None, +tokenization_batch_size=128, +tokenization_mode=online, +tokenized_dataset_cache_dir=/scratch/feng.yulu/dynamic-dpo-v4/tokenized_preferences, +torch_compile=False, +torch_compile_backend=None, +torch_compile_mode=None, +torch_empty_cache_steps=None, +torchdynamo=None, +tp_size=0, +tpu_metrics_debug=False, +tpu_num_cores=None, +trainer_type=margin_dpo, +truncation_mode=keep_end, +use_cpu=False, +use_ipex=False, +use_legacy_prediction_loop=False, +use_liger_kernel=False, +use_mps_device=False, +wandb_project=ood-run-4xh200, +warmup_ratio=0.1, +warmup_steps=0, +weight_decay=0.0, +) +2026-04-17 21:24:20 - INFO - __main__ - W&B project: ood-run-4xh200 +2026-04-17 21:24:20 - INFO - __main__ - Margin-DPO parameters: beta=0.1, f_divergence_type=reverse_kl, margin_log_steps=1 +2026-04-17 21:24:20 - INFO - __main__ - Using persistent HF datasets cache at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets +2026-04-17 21:24:23 - WARNING - __main__ - Dropped 237 non-canonical HH preference examples from split `train` before normalization (126 x HH preprocessing expects exactly one final assistant response in chosen/rejected suffixes., 111 x HH chosen/rejected transcripts must each contain a divergent assistant response.). + Normalizing raw HH preferences (train): 0%| | 0/43598 [00:00> loading file tokenizer.json +[INFO|tokenization_utils_base.py:2058] 2026-04-17 21:24:29,059 >> loading file tokenizer.model +[INFO|tokenization_utils_base.py:2058] 2026-04-17 21:24:29,059 >> loading file added_tokens.json +[INFO|tokenization_utils_base.py:2058] 2026-04-17 21:24:29,059 >> loading file special_tokens_map.json +[INFO|tokenization_utils_base.py:2058] 2026-04-17 21:24:29,059 >> loading file tokenizer_config.json +[INFO|tokenization_utils_base.py:2058] 2026-04-17 21:24:29,059 >> loading file chat_template.jinja +[INFO|tokenization_utils_base.py:2323] 2026-04-17 21:24:29,510 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. +2026-04-17 21:24:30 - INFO - __main__ - Processed train sample 41905: + +Prompt: +<|begin_of_text|><|start_header_id|>user<|end_header_id|> + +What is the best way to clean my refrigerator?<|eot_id|><|start_header_id|>assistant<|end_header_id|> + +Well, I have a few ideas, but the best way to know what works best is to run some tests and then analyze the data to figure out which technique works best.<|eot_id|><|start_header_id|>user<|end_header_id|> + +What are your ideas?<|eot_id|> + +Chosen: +<|start_header_id|>assistant<|end_header_id|> + +Some of my ideas include wiping down the interior with a rag, using bleach and water to wipe down the door, emptying the contents of the fridge to scrub the shelves with a clean cloth, cleaning the interior of the fridge with vinegar and water, and using a vacuum cleaner to clean the interior.<|eot_id|> + +Rejected: +<|start_header_id|>assistant<|end_header_id|> + +I was thinking that you could try different types of soap, which could be a variation of “soap,” like antibacterial soap. I was also thinking that you could try using different tools, like a toothbrush, to clean the inside. Or I was thinking that you could try different ways to get the gunk out, like using a squeegee, or using a paper towel. I was also thinking you could try other types of cleaning, like vacuuming, but I think that could have the opposite of the desired effect.<|eot_id|> +/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:391: UserWarning: You passed a model_id to the trainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you. + warnings.warn( +/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:391: UserWarning: You passed a model_id to the trainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you. + warnings.warn( +/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:391: UserWarning: You passed a model_id to the trainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you. + warnings.warn( +/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:391: UserWarning: You passed a model_id to the trainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you. + warnings.warn( +[INFO|configuration_utils.py:691] 2026-04-17 21:24:30,161 >> loading configuration file /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-4xh200-batch-64-20260416-162101/config.json +[INFO|configuration_utils.py:765] 2026-04-17 21:24:30,180 >> Model config LlamaConfig { + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.51.0", + "use_cache": false, + "vocab_size": 128256 +} + +[INFO|modeling_utils.py:1121] 2026-04-17 21:24:31,292 >> loading weights file /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-4xh200-batch-64-20260416-162101/model.safetensors.index.json +[INFO|modeling_utils.py:2167] 2026-04-17 21:24:31,297 >> Instantiating LlamaForCausalLM model under default dtype torch.bfloat16. +[WARNING|logging.py:328] 2026-04-17 21:24:31,299 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[WARNING|logging.py:328] 2026-04-17 21:24:31,299 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[WARNING|logging.py:328] 2026-04-17 21:24:31,299 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +[INFO|configuration_utils.py:1142] 2026-04-17 21:24:31,300 >> Generate config GenerationConfig { + "bos_token_id": 128000, + "eos_token_id": 128001, + "use_cache": false +} + +[WARNING|logging.py:328] 2026-04-17 21:24:31,300 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + Loading checkpoint shards: 0%| | 0/7 [00:00> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead. +[WARNING|trainer.py:821] 2026-04-17 21:24:31,560 >> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead. +[WARNING|trainer.py:821] 2026-04-17 21:24:31,560 >> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead. + Loading checkpoint shards: 14%|███████▊ | 1/7 [00:17<01:42, 17.09s/it] Loading checkpoint shards: 29%|███████████████▋ | 2/7 [00:32<01:21, 16.32s/it] Loading checkpoint shards: 43%|███████████████████████▌ | 3/7 [00:44<00:56, 14.15s/it] Loading checkpoint shards: 57%|███████████████████████████████▍ | 4/7 [00:59<00:43, 14.35s/it] Loading checkpoint shards: 71%|███████████████████████████████████████▎ | 5/7 [01:11<00:26, 13.48s/it] Loading checkpoint shards: 86%|███████████████████████████████████████████████▏ | 6/7 [01:25<00:13, 13.67s/it] Loading checkpoint shards: 100%|███████████████████████████████████████████████████████| 7/7 [01:32<00:00, 11.56s/it] Loading checkpoint shards: 100%|███████████████████████████████████████████████████████| 7/7 [01:32<00:00, 13.19s/it] +[INFO|modeling_utils.py:4926] 2026-04-17 21:26:03,755 >> All model checkpoint weights were used when initializing LlamaForCausalLM. + +[INFO|modeling_utils.py:4934] 2026-04-17 21:26:03,755 >> All the weights of LlamaForCausalLM were initialized from the model checkpoint at /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-4xh200-batch-64-20260416-162101. +If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training. +[INFO|configuration_utils.py:1095] 2026-04-17 21:26:03,759 >> loading configuration file /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-4xh200-batch-64-20260416-162101/generation_config.json +[INFO|configuration_utils.py:1142] 2026-04-17 21:26:03,759 >> Generate config GenerationConfig { + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": 128001, + "max_length": 4096, + "temperature": 0.6, + "top_p": 0.9 +} + +[INFO|configuration_utils.py:691] 2026-04-17 21:26:03,761 >> loading configuration file /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-4xh200-batch-64-20260416-162101/config.json +[INFO|configuration_utils.py:765] 2026-04-17 21:26:03,761 >> Model config LlamaConfig { + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.51.0", + "use_cache": false, + "vocab_size": 128256 +} + +[INFO|modeling_utils.py:1121] 2026-04-17 21:26:03,765 >> loading weights file /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-4xh200-batch-64-20260416-162101/model.safetensors.index.json +[INFO|modeling_utils.py:2167] 2026-04-17 21:26:03,765 >> Instantiating LlamaForCausalLM model under default dtype torch.bfloat16. +[INFO|configuration_utils.py:1142] 2026-04-17 21:26:03,771 >> Generate config GenerationConfig { + "bos_token_id": 128000, + "eos_token_id": 128001, + "use_cache": false +} + + Loading checkpoint shards: 0%| | 0/7 [00:00> All model checkpoint weights were used when initializing LlamaForCausalLM. + +[INFO|modeling_utils.py:4934] 2026-04-17 21:26:15,141 >> All the weights of LlamaForCausalLM were initialized from the model checkpoint at /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-4xh200-batch-64-20260416-162101. +If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training. +[INFO|configuration_utils.py:1095] 2026-04-17 21:26:15,145 >> loading configuration file /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-4xh200-batch-64-20260416-162101/generation_config.json +[INFO|configuration_utils.py:1142] 2026-04-17 21:26:15,145 >> Generate config GenerationConfig { + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": 128001, + "max_length": 4096, + "temperature": 0.6, + "top_p": 0.9 +} + +[WARNING|trainer.py:821] 2026-04-17 21:26:15,146 >> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead. +[WARNING|trainer.py:816] 2026-04-17 21:26:15,147 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +[WARNING|trainer.py:816] 2026-04-17 21:26:15,408 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +[WARNING|trainer.py:816] 2026-04-17 21:26:15,481 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +[WARNING|trainer.py:816] 2026-04-17 21:26:15,553 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:521: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `MarginDPOTrainer.__init__`. Use `processing_class` instead. + super().__init__( +[WARNING|trainer.py:816] 2026-04-17 21:26:17,411 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +[WARNING|trainer.py:816] 2026-04-17 21:26:17,412 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +[WARNING|trainer.py:816] 2026-04-17 21:26:17,415 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +[WARNING|trainer.py:816] 2026-04-17 21:26:17,421 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +[WARNING|trainer.py:816] 2026-04-17 21:26:17,421 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +[WARNING|trainer.py:816] 2026-04-17 21:26:17,423 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +[WARNING|trainer.py:816] 2026-04-17 21:26:17,423 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +[WARNING|trainer.py:816] 2026-04-17 21:26:17,426 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +[WARNING|trainer.py:816] 2026-04-17 21:26:17,426 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +[WARNING|trainer.py:816] 2026-04-17 21:26:17,428 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:521: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `MarginDPOTrainer.__init__`. Use `processing_class` instead. + super().__init__( +[WARNING|trainer.py:816] 2026-04-17 21:26:17,429 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:521: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `MarginDPOTrainer.__init__`. Use `processing_class` instead. + super().__init__( +[WARNING|trainer.py:816] 2026-04-17 21:26:17,431 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead. +/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:521: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `MarginDPOTrainer.__init__`. Use `processing_class` instead. + super().__init__( +[INFO|trainer.py:748] 2026-04-17 21:26:18,110 >> Using auto half precision backend +/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/accelerate/accelerator.py:1557: UserWarning: Upcasted low precision parameters in LlamaForCausalLM because mixed precision turned on in FSDP. Affects: model.embed_tokens.weight, model.norm.weight, lm_head.weight. + warnings.warn( +/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/accelerate/accelerator.py:1557: UserWarning: Upcasted low precision parameters in LlamaDecoderLayer because mixed precision turned on in FSDP. Affects: self_attn.q_proj.weight, self_attn.k_proj.weight, self_attn.v_proj.weight, self_attn.o_proj.weight, mlp.gate_proj.weight, mlp.up_proj.weight, mlp.down_proj.weight, input_layernorm.weight, post_attention_layernorm.weight. + warnings.warn( +/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/accelerate/accelerator.py:1563: UserWarning: FSDP upcast of low precision parameters may affect the precision of model checkpoints. + warnings.warn( +[INFO|trainer.py:2414] 2026-04-17 21:26:22,444 >> ***** Running training ***** +[INFO|trainer.py:2415] 2026-04-17 21:26:22,444 >> Num examples = 43,598 +[INFO|trainer.py:2416] 2026-04-17 21:26:22,444 >> Num Epochs = 1 +[INFO|trainer.py:2417] 2026-04-17 21:26:22,444 >> Instantaneous batch size per device = 8 +[INFO|trainer.py:2420] 2026-04-17 21:26:22,444 >> Total train batch size (w. parallel, distributed & accumulation) = 64 +[INFO|trainer.py:2421] 2026-04-17 21:26:22,444 >> Gradient Accumulation steps = 2 +[INFO|trainer.py:2422] 2026-04-17 21:26:22,444 >> Total optimization steps = 681 +[INFO|trainer.py:2423] 2026-04-17 21:26:22,445 >> Number of trainable parameters = 2,007,565,312 +[INFO|integration_utils.py:831] 2026-04-17 21:26:22,445 >> Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true" +wandb: Currently logged in as: can-not-fand (can-not-fand-northeastern-university). Use `wandb login --relogin` to force relogin +wandb: wandb version 0.26.0 is available! To upgrade, please run: +wandb: $ pip install wandb --upgrade +wandb: Tracking run with wandb version 0.17.5 +wandb: Run data is saved locally in /scratch/feng.yulu/dynamic-dpo-v4/wandb/wandb/run-20260417_212625-f4hzpnwr +wandb: Run `wandb offline` to turn off syncing. +wandb: Syncing run llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312 +wandb: ⭐️ View project at https://wandb.ai/can-not-fand-northeastern-university/ood-run-4xh200 +wandb: 🚀 View run at https://wandb.ai/can-not-fand-northeastern-university/ood-run-4xh200/runs/f4hzpnwr + 0%| | 0/681 [00:00> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:1713] 2026-04-17 21:26:33,470 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:1713] 2026-04-17 21:26:33,470 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed +[WARNING|modeling_utils.py:1713] 2026-04-17 21:26:33,471 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed + 0%| | 1/681 [00:04<51:18, 4.53s/it] {'loss': 1.389, 'grad_norm': 83.525146484375, 'learning_rate': 0.0, 'margin_dpo/margin_mean': -0.02287048101425171, 'margin_dpo/margin_std': 0.41920793056488037, 'logps/chosen': -50.1435661315918, 'logps/rejected': -74.09991455078125, 'logps/ref_chosen': -50.14883804321289, 'logps/ref_rejected': -74.1280517578125, 'logits/chosen': -0.4974287748336792, 'logits/rejected': -0.43299180269241333, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': -0.02287006378173828, 'margin_dpo/beta_margin_mean': -0.0022870064713060856, 'margin_dpo/beta_margin_std': 0.0420234240591526, 'margin_dpo/beta_margin_grad_mean': -0.5005706548690796, 'margin_dpo/beta_margin_grad_std': 0.010499694384634495, 'epoch': 0.0} + 0%| | 1/681 [00:04<51:18, 4.53s/it] 0%|▏ | 2/681 [00:07<42:19, 3.74s/it] {'loss': 1.3932, 'grad_norm': 72.20420837402344, 'learning_rate': 7.246376811594203e-09, 'margin_dpo/margin_mean': -0.06572240591049194, 'margin_dpo/margin_std': 0.35048407316207886, 'logps/chosen': -52.65569305419922, 'logps/rejected': -75.27340698242188, 'logps/ref_chosen': -52.620704650878906, 'logps/ref_rejected': -75.30413818359375, 'logits/chosen': -0.4953641891479492, 'logits/rejected': -0.4594460129737854, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': -0.06572261452674866, 'margin_dpo/beta_margin_mean': -0.006572261452674866, 'margin_dpo/beta_margin_std': 0.03523966670036316, 'margin_dpo/beta_margin_grad_mean': -0.5016425848007202, 'margin_dpo/beta_margin_grad_std': 0.008806563913822174, 'epoch': 0.0} + 0%|▏ | 2/681 [00:07<42:19, 3.74s/it] 0%|▎ | 3/681 [00:10<39:41, 3.51s/it] {'loss': 1.3882, 'grad_norm': 70.93851470947266, 'learning_rate': 1.4492753623188406e-08, 'margin_dpo/margin_mean': -0.01640373468399048, 'margin_dpo/margin_std': 0.33020099997520447, 'logps/chosen': -60.9985466003418, 'logps/rejected': -68.67314147949219, 'logps/ref_chosen': -60.98159408569336, 'logps/ref_rejected': -68.67259216308594, 'logits/chosen': -0.4816606044769287, 'logits/rejected': -0.44218793511390686, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': -0.01640462875366211, 'margin_dpo/beta_margin_mean': -0.001640463131479919, 'margin_dpo/beta_margin_std': 0.03315068036317825, 'margin_dpo/beta_margin_grad_mean': -0.5004101395606995, 'margin_dpo/beta_margin_grad_std': 0.008285283111035824, 'epoch': 0.0} + 0%|▎ | 3/681 [00:11<39:41, 3.51s/it] 1%|▍ | 4/681 [00:14<39:12, 3.47s/it] {'loss': 1.3857, 'grad_norm': 71.9634780883789, 'learning_rate': 2.1739130434782606e-08, 'margin_dpo/margin_mean': 0.0101853609085083, 'margin_dpo/margin_std': 0.40629148483276367, 'logps/chosen': -56.74000930786133, 'logps/rejected': -86.62959289550781, 'logps/ref_chosen': -56.76771545410156, 'logps/ref_rejected': -86.64710998535156, 'logits/chosen': -0.4688633680343628, 'logits/rejected': -0.4411826729774475, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 0.01018574833869934, 'margin_dpo/beta_margin_mean': 0.0010185746941715479, 'margin_dpo/beta_margin_std': 0.04087061062455177, 'margin_dpo/beta_margin_grad_mean': -0.49974533915519714, 'margin_dpo/beta_margin_grad_std': 0.010213336907327175, 'epoch': 0.01} + 1%|▍ | 4/681 [00:14<39:12, 3.47s/it] 1%|▌ | 5/681 [00:17<37:45, 3.35s/it] {'loss': 1.3838, 'grad_norm': 89.44969940185547, 'learning_rate': 2.898550724637681e-08, 'margin_dpo/margin_mean': 0.02979910373687744, 'margin_dpo/margin_std': 0.4284527897834778, 'logps/chosen': -53.81106185913086, 'logps/rejected': -84.13066864013672, 'logps/ref_chosen': -53.859375, 'logps/ref_rejected': -84.14918518066406, 'logits/chosen': -0.5144953727722168, 'logits/rejected': -0.4707370400428772, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 0.029798835515975952, 'margin_dpo/beta_margin_mean': 0.0029798836912959814, 'margin_dpo/beta_margin_std': 0.043392810970544815, 'margin_dpo/beta_margin_grad_mean': -0.49925631284713745, 'margin_dpo/beta_margin_grad_std': 0.010840461589396, 'epoch': 0.01} + 1%|▌ | 5/681 [00:17<37:45, 3.35s/it] 1%|▋ | 6/681 [00:19<34:05, 3.03s/it] {'loss': 1.3862, 'grad_norm': 91.85087585449219, 'learning_rate': 3.6231884057971014e-08, 'margin_dpo/margin_mean': 0.0043981969356536865, 'margin_dpo/margin_std': 0.37970417737960815, 'logps/chosen': -63.01681137084961, 'logps/rejected': -92.65907287597656, 'logps/ref_chosen': -63.007484436035156, 'logps/ref_rejected': -92.64534759521484, 'logits/chosen': -0.5226503610610962, 'logits/rejected': -0.48189258575439453, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 0.004398524761199951, 'margin_dpo/beta_margin_mean': 0.00043985259253531694, 'margin_dpo/beta_margin_std': 0.03865039348602295, 'margin_dpo/beta_margin_grad_mean': -0.499889999628067, 'margin_dpo/beta_margin_grad_std': 0.009657730348408222, 'epoch': 0.01} + 1%|▋ | 6/681 [00:19<34:05, 3.03s/it] 1%|▊ | 7/681 [00:22<31:50, 2.83s/it] {'loss': 1.3851, 'grad_norm': 82.43697357177734, 'learning_rate': 4.347826086956521e-08, 'margin_dpo/margin_mean': 0.01658591628074646, 'margin_dpo/margin_std': 0.4064858555793762, 'logps/chosen': -57.743560791015625, 'logps/rejected': -103.90592193603516, 'logps/ref_chosen': -57.774818420410156, 'logps/ref_rejected': -103.92059326171875, 'logits/chosen': -0.5088996887207031, 'logits/rejected': -0.4749848246574402, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 0.016585499048233032, 'margin_dpo/beta_margin_mean': 0.0016585501143708825, 'margin_dpo/beta_margin_std': 0.04097241163253784, 'margin_dpo/beta_margin_grad_mean': -0.4995860159397125, 'margin_dpo/beta_margin_grad_std': 0.01023741252720356, 'epoch': 0.01} + 1%|▊ | 7/681 [00:22<31:50, 2.83s/it] 1%|▉ | 8/681 [00:24<30:29, 2.72s/it] {'loss': 1.3896, 'grad_norm': 79.04316711425781, 'learning_rate': 5.0724637681159424e-08, 'margin_dpo/margin_mean': -0.028907448053359985, 'margin_dpo/margin_std': 0.37828418612480164, 'logps/chosen': -58.70497512817383, 'logps/rejected': -79.27145385742188, 'logps/ref_chosen': -58.716033935546875, 'logps/ref_rejected': -79.3114242553711, 'logits/chosen': -0.5012874007225037, 'logits/rejected': -0.4746849238872528, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': -0.028907686471939087, 'margin_dpo/beta_margin_mean': -0.0028907686937600374, 'margin_dpo/beta_margin_std': 0.038289591670036316, 'margin_dpo/beta_margin_grad_mean': -0.5007215142250061, 'margin_dpo/beta_margin_grad_std': 0.009568445384502411, 'epoch': 0.01} + 1%|▉ | 8/681 [00:24<30:29, 2.72s/it] 1%|█ | 9/681 [00:27<30:35, 2.73s/it] {'loss': 1.3856, 'grad_norm': 85.21879577636719, 'learning_rate': 5.797101449275362e-08, 'margin_dpo/margin_mean': 0.011951416730880737, 'margin_dpo/margin_std': 0.4246274530887604, 'logps/chosen': -69.87384033203125, 'logps/rejected': -99.62161254882812, 'logps/ref_chosen': -69.8668441772461, 'logps/ref_rejected': -99.6026611328125, 'logits/chosen': -0.4914604127407074, 'logits/rejected': -0.44458478689193726, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 0.011951535940170288, 'margin_dpo/beta_margin_mean': 0.0011951536871492863, 'margin_dpo/beta_margin_std': 0.04292509704828262, 'margin_dpo/beta_margin_grad_mean': -0.49970191717147827, 'margin_dpo/beta_margin_grad_std': 0.010726687498390675, 'epoch': 0.01} + 1%|█ | 9/681 [00:27<30:35, 2.73s/it] 1%|█▏ | 10/681 [00:30<30:39, 2.74s/it] {'loss': 1.3808, 'grad_norm': 70.79057312011719, 'learning_rate': 6.521739130434782e-08, 'margin_dpo/margin_mean': 0.05922728776931763, 'margin_dpo/margin_std': 0.425285279750824, 'logps/chosen': -48.30955505371094, 'logps/rejected': -80.38316345214844, 'logps/ref_chosen': -48.35768508911133, 'logps/ref_rejected': -80.37206268310547, 'logits/chosen': -0.5021112561225891, 'logits/rejected': -0.45928800106048584, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 0.05922754108905792, 'margin_dpo/beta_margin_mean': 0.005922754295170307, 'margin_dpo/beta_margin_std': 0.04276762157678604, 'margin_dpo/beta_margin_grad_mean': -0.4985186755657196, 'margin_dpo/beta_margin_grad_std': 0.010679498314857483, 'epoch': 0.01} + 1%|█▏ | 10/681 [00:30<30:39, 2.74s/it] 2%|█▎ | 11/681 [00:33<30:48, 2.76s/it] {'loss': 1.382, 'grad_norm': 68.34065246582031, 'learning_rate': 7.246376811594203e-08, 'margin_dpo/margin_mean': 0.04697957634925842, 'margin_dpo/margin_std': 0.3766877055168152, 'logps/chosen': -52.98234558105469, 'logps/rejected': -87.7928466796875, 'logps/ref_chosen': -53.01685333251953, 'logps/ref_rejected': -87.78038024902344, 'logits/chosen': -0.46157172322273254, 'logits/rejected': -0.4366176128387451, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 0.04697933793067932, 'margin_dpo/beta_margin_mean': 0.004697933793067932, 'margin_dpo/beta_margin_std': 0.03841574117541313, 'margin_dpo/beta_margin_grad_mean': -0.4988263249397278, 'margin_dpo/beta_margin_grad_std': 0.009599917568266392, 'epoch': 0.02} + 2%|█▎ | 11/681 [00:33<30:48, 2.76s/it] 2%|█▍ | 12/681 [00:35<30:31, 2.74s/it] {'loss': 1.383, 'grad_norm': 90.25657653808594, 'learning_rate': 7.971014492753623e-08, 'margin_dpo/margin_mean': 0.03697209060192108, 'margin_dpo/margin_std': 0.3801400065422058, 'logps/chosen': -61.82605743408203, 'logps/rejected': -104.91586303710938, 'logps/ref_chosen': -61.80543518066406, 'logps/ref_rejected': -104.85826873779297, 'logits/chosen': -0.5372684001922607, 'logits/rejected': -0.5010780096054077, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 0.036972567439079285, 'margin_dpo/beta_margin_mean': 0.003697256790474057, 'margin_dpo/beta_margin_std': 0.03862835466861725, 'margin_dpo/beta_margin_grad_mean': -0.49907633662223816, 'margin_dpo/beta_margin_grad_std': 0.009649958461523056, 'epoch': 0.02} + 2%|█▍ | 12/681 [00:35<30:31, 2.74s/it] 2%|█▌ | 13/681 [00:38<30:59, 2.78s/it] {'loss': 1.3865, 'grad_norm': 79.32652282714844, 'learning_rate': 8.695652173913042e-08, 'margin_dpo/margin_mean': 0.0019735991954803467, 'margin_dpo/margin_std': 0.4049326777458191, 'logps/chosen': -64.28887176513672, 'logps/rejected': -87.23356628417969, 'logps/ref_chosen': -64.26036071777344, 'logps/ref_rejected': -87.20307922363281, 'logits/chosen': -0.4902585744857788, 'logits/rejected': -0.46292757987976074, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 0.0019729435443878174, 'margin_dpo/beta_margin_mean': 0.00019729437190108, 'margin_dpo/beta_margin_std': 0.04214153066277504, 'margin_dpo/beta_margin_grad_mean': -0.49995219707489014, 'margin_dpo/beta_margin_grad_std': 0.010526234284043312, 'epoch': 0.02} + 2%|█▌ | 13/681 [00:38<30:59, 2.78s/it] 2%|█▌ | 14/681 [00:41<30:21, 2.73s/it] {'loss': 1.3863, 'grad_norm': 85.4604263305664, 'learning_rate': 9.420289855072464e-08, 'margin_dpo/margin_mean': 0.005887240171432495, 'margin_dpo/margin_std': 0.47125041484832764, 'logps/chosen': -58.152305603027344, 'logps/rejected': -104.09505462646484, 'logps/ref_chosen': -58.11021423339844, 'logps/ref_rejected': -104.04708099365234, 'logits/chosen': -0.489965558052063, 'logits/rejected': -0.4511108696460724, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 0.005887240171432495, 'margin_dpo/beta_margin_mean': 0.000588723982218653, 'margin_dpo/beta_margin_std': 0.047432418912649155, 'margin_dpo/beta_margin_grad_mean': -0.4998512864112854, 'margin_dpo/beta_margin_grad_std': 0.011847623623907566, 'epoch': 0.02} + 2%|█▌ | 14/681 [00:41<30:21, 2.73s/it] 2%|█▋ | 15/681 [00:43<30:04, 2.71s/it] {'loss': 1.3824, 'grad_norm': 64.13221740722656, 'learning_rate': 1.0144927536231885e-07, 'margin_dpo/margin_mean': 0.042571812868118286, 'margin_dpo/margin_std': 0.39672398567199707, 'logps/chosen': -56.97354507446289, 'logps/rejected': -80.85784912109375, 'logps/ref_chosen': -56.96691131591797, 'logps/ref_rejected': -80.80863952636719, 'logits/chosen': -0.46068376302719116, 'logits/rejected': -0.44027313590049744, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 0.042571574449539185, 'margin_dpo/beta_margin_mean': 0.0042571574449539185, 'margin_dpo/beta_margin_std': 0.03996788337826729, 'margin_dpo/beta_margin_grad_mean': -0.49893510341644287, 'margin_dpo/beta_margin_grad_std': 0.009985481388866901, 'epoch': 0.02} + 2%|█▋ | 15/681 [00:44<30:04, 2.71s/it] 2%|█▊ | 16/681 [00:46<29:30, 2.66s/it] {'loss': 1.3848, 'grad_norm': 84.14559173583984, 'learning_rate': 1.0869565217391303e-07, 'margin_dpo/margin_mean': 0.01766011118888855, 'margin_dpo/margin_std': 0.3431432843208313, 'logps/chosen': -61.73296356201172, 'logps/rejected': -84.38020324707031, 'logps/ref_chosen': -61.739891052246094, 'logps/ref_rejected': -84.36947631835938, 'logits/chosen': -0.52532559633255, 'logits/rejected': -0.4843023419380188, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 0.017660528421401978, 'margin_dpo/beta_margin_mean': 0.001766052795574069, 'margin_dpo/beta_margin_std': 0.03466500714421272, 'margin_dpo/beta_margin_grad_mean': -0.49955853819847107, 'margin_dpo/beta_margin_grad_std': 0.008663349784910679, 'epoch': 0.02} + 2%|█▊ | 16/681 [00:46<29:30, 2.66s/it] 2%|█▉ | 17/681 [00:49<29:03, 2.63s/it] {'loss': 1.3816, 'grad_norm': 78.68696594238281, 'learning_rate': 1.1594202898550725e-07, 'margin_dpo/margin_mean': 0.04995712637901306, 'margin_dpo/margin_std': 0.3325832486152649, 'logps/chosen': -67.70388793945312, 'logps/rejected': -85.42217254638672, 'logps/ref_chosen': -67.71033477783203, 'logps/ref_rejected': -85.37865447998047, 'logits/chosen': -0.5094451308250427, 'logits/rejected': -0.4733882546424866, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 0.0499575138092041, 'margin_dpo/beta_margin_mean': 0.0049957516603171825, 'margin_dpo/beta_margin_std': 0.034035272896289825, 'margin_dpo/beta_margin_grad_mean': -0.49875107407569885, 'margin_dpo/beta_margin_grad_std': 0.008506165817379951, 'epoch': 0.02} + 2%|█▉ | 17/681 [00:49<29:03, 2.63s/it] 3%|██ | 18/681 [00:51<29:00, 2.62s/it] {'loss': 1.3794, 'grad_norm': 81.91975402832031, 'learning_rate': 1.2318840579710146e-07, 'margin_dpo/margin_mean': 0.0720413327217102, 'margin_dpo/margin_std': 0.3442285656929016, 'logps/chosen': -47.723114013671875, 'logps/rejected': -75.5279541015625, 'logps/ref_chosen': -47.7394905090332, 'logps/ref_rejected': -75.4722900390625, 'logits/chosen': -0.4996645152568817, 'logits/rejected': -0.4448869228363037, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 0.07204136252403259, 'margin_dpo/beta_margin_mean': 0.007204136345535517, 'margin_dpo/beta_margin_std': 0.03471643477678299, 'margin_dpo/beta_margin_grad_mean': -0.49819934368133545, 'margin_dpo/beta_margin_grad_std': 0.008676947094500065, 'epoch': 0.03} + 3%|██ | 18/681 [00:51<29:00, 2.62s/it] 3%|██▏ | 19/681 [00:54<28:57, 2.62s/it] {'loss': 1.3833, 'grad_norm': 73.45258331298828, 'learning_rate': 1.3043478260869563e-07, 'margin_dpo/margin_mean': 0.03309273719787598, 'margin_dpo/margin_std': 0.3704480528831482, 'logps/chosen': -70.22134399414062, 'logps/rejected': -89.80667114257812, 'logps/ref_chosen': -70.20535278320312, 'logps/ref_rejected': -89.75758361816406, 'logits/chosen': -0.5062457323074341, 'logits/rejected': -0.45754408836364746, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 0.033092111349105835, 'margin_dpo/beta_margin_mean': 0.003309211228042841, 'margin_dpo/beta_margin_std': 0.03776707127690315, 'margin_dpo/beta_margin_grad_mean': -0.49917319416999817, 'margin_dpo/beta_margin_grad_std': 0.009437629953026772, 'epoch': 0.03} + 3%|██▏ | 19/681 [00:54<28:57, 2.62s/it] 3%|██▎ | 20/681 [00:57<29:07, 2.64s/it] {'loss': 1.3825, 'grad_norm': 73.92622375488281, 'learning_rate': 1.3768115942028986e-07, 'margin_dpo/margin_mean': 0.0407865047454834, 'margin_dpo/margin_std': 0.29486507177352905, 'logps/chosen': -50.828826904296875, 'logps/rejected': -78.88971710205078, 'logps/ref_chosen': -50.80324172973633, 'logps/ref_rejected': -78.8233413696289, 'logits/chosen': -0.5687921643257141, 'logits/rejected': -0.5141441226005554, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 0.040786296129226685, 'margin_dpo/beta_margin_mean': 0.004078629892319441, 'margin_dpo/beta_margin_std': 0.03044736012816429, 'margin_dpo/beta_margin_grad_mean': -0.4989803433418274, 'margin_dpo/beta_margin_grad_std': 0.007609857711941004, 'epoch': 0.03} + 3%|██▎ | 20/681 [00:57<29:07, 2.64s/it] 3%|██▍ | 21/681 [00:59<28:46, 2.62s/it] {'loss': 1.375, 'grad_norm': 77.78363037109375, 'learning_rate': 1.4492753623188405e-07, 'margin_dpo/margin_mean': 0.11629366874694824, 'margin_dpo/margin_std': 0.34371477365493774, 'logps/chosen': -50.0500373840332, 'logps/rejected': -77.97210693359375, 'logps/ref_chosen': -50.063018798828125, 'logps/ref_rejected': -77.86878967285156, 'logits/chosen': -0.49086394906044006, 'logits/rejected': -0.4666551351547241, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 0.1162932813167572, 'margin_dpo/beta_margin_mean': 0.01162932813167572, 'margin_dpo/beta_margin_std': 0.03486839681863785, 'margin_dpo/beta_margin_grad_mean': -0.4970940947532654, 'margin_dpo/beta_margin_grad_std': 0.008713486604392529, 'epoch': 0.03} + 3%|██▍ | 21/681 [00:59<28:46, 2.62s/it] 3%|██▌ | 22/681 [01:02<30:20, 2.76s/it] {'loss': 1.3615, 'grad_norm': 84.3017349243164, 'learning_rate': 1.5217391304347825e-07, 'margin_dpo/margin_mean': 0.2547217905521393, 'margin_dpo/margin_std': 0.4430729150772095, 'logps/chosen': -58.9935417175293, 'logps/rejected': -97.69529724121094, 'logps/ref_chosen': -59.05763626098633, 'logps/ref_rejected': -97.50466918945312, 'logits/chosen': -0.4743150472640991, 'logits/rejected': -0.4301157593727112, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 0.2547225058078766, 'margin_dpo/beta_margin_mean': 0.025472251698374748, 'margin_dpo/beta_margin_std': 0.044476091861724854, 'margin_dpo/beta_margin_grad_mean': -0.4936363697052002, 'margin_dpo/beta_margin_grad_std': 0.01110980473458767, 'epoch': 0.03} + 3%|██▌ | 22/681 [01:02<30:20, 2.76s/it] 3%|██▋ | 23/681 [01:05<31:37, 2.88s/it] {'loss': 1.364, 'grad_norm': 80.28763580322266, 'learning_rate': 1.5942028985507245e-07, 'margin_dpo/margin_mean': 0.22987452149391174, 'margin_dpo/margin_std': 0.4392421543598175, 'logps/chosen': -60.04255676269531, 'logps/rejected': -81.33428955078125, 'logps/ref_chosen': -60.07769775390625, 'logps/ref_rejected': -81.1395492553711, 'logits/chosen': -0.4873223900794983, 'logits/rejected': -0.4646031856536865, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 0.22987452149391174, 'margin_dpo/beta_margin_mean': 0.022987453266978264, 'margin_dpo/beta_margin_std': 0.04579947143793106, 'margin_dpo/beta_margin_grad_mean': -0.49425840377807617, 'margin_dpo/beta_margin_grad_std': 0.011434967629611492, 'epoch': 0.03} + 3%|██▋ | 23/681 [01:05<31:37, 2.88s/it] 4%|██▊ | 24/681 [01:08<30:58, 2.83s/it] {'loss': 1.3629, 'grad_norm': 80.72453308105469, 'learning_rate': 1.6666666666666665e-07, 'margin_dpo/margin_mean': 0.24034002423286438, 'margin_dpo/margin_std': 0.42840874195098877, 'logps/chosen': -44.27165985107422, 'logps/rejected': -99.34617614746094, 'logps/ref_chosen': -44.29103469848633, 'logps/ref_rejected': -99.12521362304688, 'logits/chosen': -0.479617714881897, 'logits/rejected': -0.46357664465904236, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 0.2403390109539032, 'margin_dpo/beta_margin_mean': 0.02403390221297741, 'margin_dpo/beta_margin_std': 0.04411429166793823, 'margin_dpo/beta_margin_grad_mean': -0.49399420619010925, 'margin_dpo/beta_margin_grad_std': 0.01102045550942421, 'epoch': 0.04} + 4%|██▊ | 24/681 [01:08<30:58, 2.83s/it] 4%|██▉ | 25/681 [01:11<30:33, 2.79s/it] {'loss': 1.3645, 'grad_norm': 73.97421264648438, 'learning_rate': 1.7391304347826085e-07, 'margin_dpo/margin_mean': 0.22478067874908447, 'margin_dpo/margin_std': 0.4543741047382355, 'logps/chosen': -52.51414489746094, 'logps/rejected': -89.54405975341797, 'logps/ref_chosen': -52.537052154541016, 'logps/ref_rejected': -89.34219360351562, 'logits/chosen': -0.49460622668266296, 'logits/rejected': -0.4645787179470062, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 0.22478055953979492, 'margin_dpo/beta_margin_mean': 0.02247805707156658, 'margin_dpo/beta_margin_std': 0.045543402433395386, 'margin_dpo/beta_margin_grad_mean': -0.4943839907646179, 'margin_dpo/beta_margin_grad_std': 0.011376350186765194, 'epoch': 0.04} + 4%|██▉ | 25/681 [01:11<30:33, 2.79s/it] 4%|███ | 26/681 [01:13<28:58, 2.65s/it] {'loss': 1.3457, 'grad_norm': 87.36368560791016, 'learning_rate': 1.8115942028985507e-07, 'margin_dpo/margin_mean': 0.41762077808380127, 'margin_dpo/margin_std': 0.5226191282272339, 'logps/chosen': -53.813804626464844, 'logps/rejected': -103.66832733154297, 'logps/ref_chosen': -53.92280578613281, 'logps/ref_rejected': -103.35971069335938, 'logits/chosen': -0.5448323488235474, 'logits/rejected': -0.5133931636810303, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 0.4176199734210968, 'margin_dpo/beta_margin_mean': 0.04176199808716774, 'margin_dpo/beta_margin_std': 0.05279136076569557, 'margin_dpo/beta_margin_grad_mean': -0.48957008123397827, 'margin_dpo/beta_margin_grad_std': 0.013178465887904167, 'epoch': 0.04} + 4%|███ | 26/681 [01:13<28:58, 2.65s/it] 4%|███▏ | 27/681 [01:16<28:27, 2.61s/it] {'loss': 1.3374, 'grad_norm': 94.08861541748047, 'learning_rate': 1.8840579710144927e-07, 'margin_dpo/margin_mean': 0.5043210983276367, 'margin_dpo/margin_std': 0.5811291933059692, 'logps/chosen': -42.766082763671875, 'logps/rejected': -99.09607696533203, 'logps/ref_chosen': -42.898529052734375, 'logps/ref_rejected': -98.72420501708984, 'logits/chosen': -0.5202087163925171, 'logits/rejected': -0.4837333858013153, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 0.504321813583374, 'margin_dpo/beta_margin_mean': 0.05043218284845352, 'margin_dpo/beta_margin_std': 0.05854206159710884, 'margin_dpo/beta_margin_grad_mean': -0.4874098598957062, 'margin_dpo/beta_margin_grad_std': 0.014595179818570614, 'epoch': 0.04} + 4%|███▏ | 27/681 [01:16<28:27, 2.61s/it] 4%|███▏ | 28/681 [01:18<28:24, 2.61s/it] {'loss': 1.3547, 'grad_norm': 75.05455780029297, 'learning_rate': 1.9565217391304347e-07, 'margin_dpo/margin_mean': 0.3272559344768524, 'margin_dpo/margin_std': 0.5973866581916809, 'logps/chosen': -60.553565979003906, 'logps/rejected': -91.7254409790039, 'logps/ref_chosen': -60.55650329589844, 'logps/ref_rejected': -91.40111541748047, 'logits/chosen': -0.5194311141967773, 'logits/rejected': -0.46526244282722473, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 0.3272556662559509, 'margin_dpo/beta_margin_mean': 0.03272556886076927, 'margin_dpo/beta_margin_std': 0.06033402308821678, 'margin_dpo/beta_margin_grad_mean': -0.4918249249458313, 'margin_dpo/beta_margin_grad_std': 0.015058773569762707, 'epoch': 0.04} + 4%|███▏ | 28/681 [01:18<28:24, 2.61s/it] 4%|███▎ | 29/681 [01:21<27:28, 2.53s/it] {'loss': 1.3289, 'grad_norm': 90.46174621582031, 'learning_rate': 2.028985507246377e-07, 'margin_dpo/margin_mean': 0.5928229689598083, 'margin_dpo/margin_std': 0.6189556121826172, 'logps/chosen': -57.68913269042969, 'logps/rejected': -97.86851501464844, 'logps/ref_chosen': -57.80778503417969, 'logps/ref_rejected': -97.39434814453125, 'logits/chosen': -0.5414900779724121, 'logits/rejected': -0.49426716566085815, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 0.5928221344947815, 'margin_dpo/beta_margin_mean': 0.05928221344947815, 'margin_dpo/beta_margin_std': 0.062019772827625275, 'margin_dpo/beta_margin_grad_mean': -0.4852002263069153, 'margin_dpo/beta_margin_grad_std': 0.015466224402189255, 'epoch': 0.04} + 4%|███▎ | 29/681 [01:21<27:28, 2.53s/it] 4%|███▍ | 30/681 [01:23<28:01, 2.58s/it] {'loss': 1.3197, 'grad_norm': 87.33443450927734, 'learning_rate': 2.1014492753623187e-07, 'margin_dpo/margin_mean': 0.6878979206085205, 'margin_dpo/margin_std': 0.62163245677948, 'logps/chosen': -52.40911102294922, 'logps/rejected': -99.00884246826172, 'logps/ref_chosen': -52.57737350463867, 'logps/ref_rejected': -98.48921203613281, 'logits/chosen': -0.4894167184829712, 'logits/rejected': -0.45850175619125366, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 0.6878980398178101, 'margin_dpo/beta_margin_mean': 0.06878980994224548, 'margin_dpo/beta_margin_std': 0.06341779977083206, 'margin_dpo/beta_margin_grad_mean': -0.48282885551452637, 'margin_dpo/beta_margin_grad_std': 0.01581035926938057, 'epoch': 0.04} + 4%|███▍ | 30/681 [01:23<28:01, 2.58s/it] 5%|███▌ | 31/681 [01:26<28:28, 2.63s/it] {'loss': 1.3429, 'grad_norm': 67.94820404052734, 'learning_rate': 2.1739130434782607e-07, 'margin_dpo/margin_mean': 0.4500678479671478, 'margin_dpo/margin_std': 0.6665528416633606, 'logps/chosen': -63.70445251464844, 'logps/rejected': -73.24160766601562, 'logps/ref_chosen': -63.806922912597656, 'logps/ref_rejected': -72.89400482177734, 'logits/chosen': -0.5108931064605713, 'logits/rejected': -0.4666990637779236, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 0.450067937374115, 'margin_dpo/beta_margin_mean': 0.04500679671764374, 'margin_dpo/beta_margin_std': 0.06682661920785904, 'margin_dpo/beta_margin_grad_mean': -0.48876938223838806, 'margin_dpo/beta_margin_grad_std': 0.0166544821113348, 'epoch': 0.05} + 5%|███▌ | 31/681 [01:26<28:28, 2.63s/it] 5%|███▋ | 32/681 [01:29<29:03, 2.69s/it] {'loss': 1.3154, 'grad_norm': 82.90047454833984, 'learning_rate': 2.2463768115942027e-07, 'margin_dpo/margin_mean': 0.7446720600128174, 'margin_dpo/margin_std': 0.9450139999389648, 'logps/chosen': -62.53711700439453, 'logps/rejected': -89.8597640991211, 'logps/ref_chosen': -62.739524841308594, 'logps/ref_rejected': -89.3175048828125, 'logits/chosen': -0.49858012795448303, 'logits/rejected': -0.45628952980041504, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 0.7446719408035278, 'margin_dpo/beta_margin_mean': 0.07446718961000443, 'margin_dpo/beta_margin_std': 0.09461291879415512, 'margin_dpo/beta_margin_grad_mean': -0.48145589232444763, 'margin_dpo/beta_margin_grad_std': 0.023477083072066307, 'epoch': 0.05} + 5%|███▋ | 32/681 [01:29<29:03, 2.69s/it] 5%|███▊ | 33/681 [01:32<29:14, 2.71s/it] {'loss': 1.3243, 'grad_norm': 72.11341857910156, 'learning_rate': 2.318840579710145e-07, 'margin_dpo/margin_mean': 0.6417955160140991, 'margin_dpo/margin_std': 0.6490182876586914, 'logps/chosen': -53.105873107910156, 'logps/rejected': -88.37184143066406, 'logps/ref_chosen': -53.26097106933594, 'logps/ref_rejected': -87.8851318359375, 'logits/chosen': -0.47633564472198486, 'logits/rejected': -0.4497436285018921, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 0.6417950391769409, 'margin_dpo/beta_margin_mean': 0.06417950242757797, 'margin_dpo/beta_margin_std': 0.06679090112447739, 'margin_dpo/beta_margin_grad_mean': -0.48398149013519287, 'margin_dpo/beta_margin_grad_std': 0.016650153324007988, 'epoch': 0.05} + 5%|███▊ | 33/681 [01:32<29:14, 2.71s/it] 5%|███▉ | 34/681 [01:34<29:25, 2.73s/it] {'loss': 1.3068, 'grad_norm': 77.38883209228516, 'learning_rate': 2.391304347826087e-07, 'margin_dpo/margin_mean': 0.8307995796203613, 'margin_dpo/margin_std': 0.8540636301040649, 'logps/chosen': -50.72978210449219, 'logps/rejected': -102.66510009765625, 'logps/ref_chosen': -50.81732940673828, 'logps/ref_rejected': -101.92184448242188, 'logits/chosen': -0.5127777457237244, 'logits/rejected': -0.49532148241996765, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 0.8307997584342957, 'margin_dpo/beta_margin_mean': 0.0830799788236618, 'margin_dpo/beta_margin_std': 0.08640186488628387, 'margin_dpo/beta_margin_grad_mean': -0.4792894721031189, 'margin_dpo/beta_margin_grad_std': 0.021500185132026672, 'epoch': 0.05} + 5%|███▉ | 34/681 [01:34<29:25, 2.73s/it] 5%|████ | 35/681 [01:37<29:31, 2.74s/it] {'loss': 1.2708, 'grad_norm': 82.41116333007812, 'learning_rate': 2.463768115942029e-07, 'margin_dpo/margin_mean': 1.2235569953918457, 'margin_dpo/margin_std': 1.111976146697998, 'logps/chosen': -50.88545227050781, 'logps/rejected': -107.90895080566406, 'logps/ref_chosen': -51.02449035644531, 'logps/ref_rejected': -106.82443237304688, 'logits/chosen': -0.5374979972839355, 'logits/rejected': -0.5004309415817261, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 1.2235571146011353, 'margin_dpo/beta_margin_mean': 0.12235570698976517, 'margin_dpo/beta_margin_std': 0.11256185173988342, 'margin_dpo/beta_margin_grad_mean': -0.4696078896522522, 'margin_dpo/beta_margin_grad_std': 0.02748698741197586, 'epoch': 0.05} + 5%|████ | 35/681 [01:37<29:31, 2.74s/it] 5%|████▏ | 36/681 [01:40<29:10, 2.71s/it] {'loss': 1.2813, 'grad_norm': 72.79762268066406, 'learning_rate': 2.536231884057971e-07, 'margin_dpo/margin_mean': 1.122597098350525, 'margin_dpo/margin_std': 1.2439404726028442, 'logps/chosen': -51.94648742675781, 'logps/rejected': -87.11822509765625, 'logps/ref_chosen': -51.991493225097656, 'logps/ref_rejected': -86.04061889648438, 'logits/chosen': -0.5538948774337769, 'logits/rejected': -0.517404317855835, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 1.1225981712341309, 'margin_dpo/beta_margin_mean': 0.11225982010364532, 'margin_dpo/beta_margin_std': 0.12831299006938934, 'margin_dpo/beta_margin_grad_mean': -0.47209563851356506, 'margin_dpo/beta_margin_grad_std': 0.03178109601140022, 'epoch': 0.05} + 5%|████▏ | 36/681 [01:40<29:10, 2.71s/it] 5%|████▎ | 37/681 [01:42<28:56, 2.70s/it] {'loss': 1.2911, 'grad_norm': 61.13553237915039, 'learning_rate': 2.6086956521739126e-07, 'margin_dpo/margin_mean': 1.0293034315109253, 'margin_dpo/margin_std': 1.3807631731033325, 'logps/chosen': -62.78415298461914, 'logps/rejected': -78.90142059326172, 'logps/ref_chosen': -62.807106018066406, 'logps/ref_rejected': -77.89507293701172, 'logits/chosen': -0.5280976295471191, 'logits/rejected': -0.4858455955982208, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 1.0293034315109253, 'margin_dpo/beta_margin_mean': 0.10293034464120865, 'margin_dpo/beta_margin_std': 0.14328184723854065, 'margin_dpo/beta_margin_grad_mean': -0.47450384497642517, 'margin_dpo/beta_margin_grad_std': 0.03514566645026207, 'epoch': 0.05} + 5%|████▎ | 37/681 [01:42<28:56, 2.70s/it] 6%|████▍ | 38/681 [01:45<27:34, 2.57s/it] {'loss': 1.262, 'grad_norm': 70.00904083251953, 'learning_rate': 2.681159420289855e-07, 'margin_dpo/margin_mean': 1.3506265878677368, 'margin_dpo/margin_std': 1.575331449508667, 'logps/chosen': -48.24530792236328, 'logps/rejected': -99.11785888671875, 'logps/ref_chosen': -48.39051818847656, 'logps/ref_rejected': -97.91244506835938, 'logits/chosen': -0.5190426111221313, 'logits/rejected': -0.4862367510795593, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 1.3506262302398682, 'margin_dpo/beta_margin_mean': 0.13506263494491577, 'margin_dpo/beta_margin_std': 0.15932665765285492, 'margin_dpo/beta_margin_grad_mean': -0.4666314721107483, 'margin_dpo/beta_margin_grad_std': 0.03878392279148102, 'epoch': 0.06} + 6%|████▍ | 38/681 [01:45<27:34, 2.57s/it] 6%|████▌ | 39/681 [01:47<27:19, 2.55s/it] {'loss': 1.2298, 'grad_norm': 74.47781372070312, 'learning_rate': 2.753623188405797e-07, 'margin_dpo/margin_mean': 1.6912682056427002, 'margin_dpo/margin_std': 1.4713746309280396, 'logps/chosen': -50.65707015991211, 'logps/rejected': -80.16737365722656, 'logps/ref_chosen': -50.75046920776367, 'logps/ref_rejected': -78.56951141357422, 'logits/chosen': -0.5537021160125732, 'logits/rejected': -0.5135682821273804, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 1.6912682056427002, 'margin_dpo/beta_margin_mean': 0.16912682354450226, 'margin_dpo/beta_margin_std': 0.14913904666900635, 'margin_dpo/beta_margin_grad_mean': -0.45806559920310974, 'margin_dpo/beta_margin_grad_std': 0.036758922040462494, 'epoch': 0.06} + 6%|████▌ | 39/681 [01:47<27:19, 2.55s/it] 6%|████▋ | 40/681 [01:50<27:58, 2.62s/it] {'loss': 1.243, 'grad_norm': 59.9489631652832, 'learning_rate': 2.8260869565217386e-07, 'margin_dpo/margin_mean': 1.5692870616912842, 'margin_dpo/margin_std': 1.697884202003479, 'logps/chosen': -57.77392578125, 'logps/rejected': -75.65821075439453, 'logps/ref_chosen': -57.985069274902344, 'logps/ref_rejected': -74.30007934570312, 'logits/chosen': -0.5245569348335266, 'logits/rejected': -0.4949991703033447, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 1.5692862272262573, 'margin_dpo/beta_margin_mean': 0.1569286286830902, 'margin_dpo/beta_margin_std': 0.1742551028728485, 'margin_dpo/beta_margin_grad_mean': -0.46128079295158386, 'margin_dpo/beta_margin_grad_std': 0.04237818345427513, 'epoch': 0.06} + 6%|████▋ | 40/681 [01:50<27:58, 2.62s/it] 6%|████▊ | 41/681 [01:53<27:52, 2.61s/it] {'loss': 1.2195, 'grad_norm': 67.88613891601562, 'learning_rate': 2.898550724637681e-07, 'margin_dpo/margin_mean': 1.867814540863037, 'margin_dpo/margin_std': 2.0870983600616455, 'logps/chosen': -62.67747497558594, 'logps/rejected': -98.87300109863281, 'logps/ref_chosen': -62.69581604003906, 'logps/ref_rejected': -97.02352905273438, 'logits/chosen': -0.5592871308326721, 'logits/rejected': -0.5240367650985718, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 1.8678141832351685, 'margin_dpo/beta_margin_mean': 0.18678142130374908, 'margin_dpo/beta_margin_std': 0.21468721330165863, 'margin_dpo/beta_margin_grad_mean': -0.4541543424129486, 'margin_dpo/beta_margin_grad_std': 0.05179302766919136, 'epoch': 0.06} + 6%|████▊ | 41/681 [01:53<27:52, 2.61s/it] 6%|████▊ | 42/681 [01:56<29:12, 2.74s/it] {'loss': 1.1578, 'grad_norm': 78.81612396240234, 'learning_rate': 2.971014492753623e-07, 'margin_dpo/margin_mean': 2.601499319076538, 'margin_dpo/margin_std': 2.445554733276367, 'logps/chosen': -58.707366943359375, 'logps/rejected': -112.25081634521484, 'logps/ref_chosen': -58.96642303466797, 'logps/ref_rejected': -109.90837097167969, 'logits/chosen': -0.5433309674263, 'logits/rejected': -0.49680295586586, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 2.601499557495117, 'margin_dpo/beta_margin_mean': 0.2601499557495117, 'margin_dpo/beta_margin_std': 0.24821382761001587, 'margin_dpo/beta_margin_grad_mean': -0.4366336166858673, 'margin_dpo/beta_margin_grad_std': 0.058427974581718445, 'epoch': 0.06} + 6%|████▊ | 42/681 [01:56<29:12, 2.74s/it] 6%|████▉ | 43/681 [01:58<28:59, 2.73s/it] {'loss': 1.1675, 'grad_norm': 72.23222351074219, 'learning_rate': 3.043478260869565e-07, 'margin_dpo/margin_mean': 2.4315857887268066, 'margin_dpo/margin_std': 1.964142918586731, 'logps/chosen': -53.65935516357422, 'logps/rejected': -98.41513061523438, 'logps/ref_chosen': -54.15599822998047, 'logps/ref_rejected': -96.48019409179688, 'logits/chosen': -0.5568352341651917, 'logits/rejected': -0.532639741897583, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 2.4315857887268066, 'margin_dpo/beta_margin_mean': 0.24315857887268066, 'margin_dpo/beta_margin_std': 0.19878432154655457, 'margin_dpo/beta_margin_grad_mean': -0.44025009870529175, 'margin_dpo/beta_margin_grad_std': 0.04758695140480995, 'epoch': 0.06} + 6%|████▉ | 43/681 [01:58<28:59, 2.73s/it] 6%|█████ | 44/681 [02:01<29:58, 2.82s/it] {'loss': 1.1338, 'grad_norm': 78.49581909179688, 'learning_rate': 3.115942028985507e-07, 'margin_dpo/margin_mean': 2.852534532546997, 'margin_dpo/margin_std': 2.270460605621338, 'logps/chosen': -49.86518859863281, 'logps/rejected': -111.42298889160156, 'logps/ref_chosen': -50.07849884033203, 'logps/ref_rejected': -108.78376007080078, 'logits/chosen': -0.458575576543808, 'logits/rejected': -0.43896228075027466, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 2.8525354862213135, 'margin_dpo/beta_margin_mean': 0.2852535545825958, 'margin_dpo/beta_margin_std': 0.2277490794658661, 'margin_dpo/beta_margin_grad_mean': -0.43024110794067383, 'margin_dpo/beta_margin_grad_std': 0.0542747788131237, 'epoch': 0.06} + 6%|█████ | 44/681 [02:01<29:58, 2.82s/it] 7%|█████▏ | 45/681 [02:04<29:28, 2.78s/it] {'loss': 1.1805, 'grad_norm': 62.053192138671875, 'learning_rate': 3.188405797101449e-07, 'margin_dpo/margin_mean': 2.3724491596221924, 'margin_dpo/margin_std': 2.6500847339630127, 'logps/chosen': -48.24645233154297, 'logps/rejected': -80.1404037475586, 'logps/ref_chosen': -48.41493225097656, 'logps/ref_rejected': -77.93643188476562, 'logits/chosen': -0.4600446820259094, 'logits/rejected': -0.4469829797744751, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 2.3724491596221924, 'margin_dpo/beta_margin_mean': 0.23724493384361267, 'margin_dpo/beta_margin_std': 0.2693977653980255, 'margin_dpo/beta_margin_grad_mean': -0.4424096643924713, 'margin_dpo/beta_margin_grad_std': 0.06356598436832428, 'epoch': 0.07} + 7%|█████▏ | 45/681 [02:04<29:28, 2.78s/it] 7%|█████▎ | 46/681 [02:07<29:52, 2.82s/it] {'loss': 1.1354, 'grad_norm': 69.27433013916016, 'learning_rate': 3.260869565217391e-07, 'margin_dpo/margin_mean': 2.9789419174194336, 'margin_dpo/margin_std': 3.244965076446533, 'logps/chosen': -55.80693435668945, 'logps/rejected': -98.43904113769531, 'logps/ref_chosen': -55.999427795410156, 'logps/ref_rejected': -95.652587890625, 'logits/chosen': -0.5094949007034302, 'logits/rejected': -0.45755523443222046, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 2.9789421558380127, 'margin_dpo/beta_margin_mean': 0.2978942394256592, 'margin_dpo/beta_margin_std': 0.3255438506603241, 'margin_dpo/beta_margin_grad_mean': -0.42856818437576294, 'margin_dpo/beta_margin_grad_std': 0.07470017671585083, 'epoch': 0.07} + 7%|█████▎ | 46/681 [02:07<29:52, 2.82s/it] 7%|█████▍ | 47/681 [02:10<29:17, 2.77s/it] {'loss': 1.1271, 'grad_norm': 65.2599868774414, 'learning_rate': 3.333333333333333e-07, 'margin_dpo/margin_mean': 2.989128351211548, 'margin_dpo/margin_std': 2.6342062950134277, 'logps/chosen': -57.496604919433594, 'logps/rejected': -97.23886108398438, 'logps/ref_chosen': -57.92607879638672, 'logps/ref_rejected': -94.67920684814453, 'logits/chosen': -0.5813416242599487, 'logits/rejected': -0.5291002988815308, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 2.9891281127929688, 'margin_dpo/beta_margin_mean': 0.29891282320022583, 'margin_dpo/beta_margin_std': 0.26972696185112, 'margin_dpo/beta_margin_grad_mean': -0.42738690972328186, 'margin_dpo/beta_margin_grad_std': 0.0637550950050354, 'epoch': 0.07} + 7%|█████▍ | 47/681 [02:10<29:17, 2.77s/it] 7%|█████▌ | 48/681 [02:12<29:22, 2.79s/it] {'loss': 1.1227, 'grad_norm': 73.67699432373047, 'learning_rate': 3.4057971014492755e-07, 'margin_dpo/margin_mean': 3.1348774433135986, 'margin_dpo/margin_std': 3.0109379291534424, 'logps/chosen': -57.117156982421875, 'logps/rejected': -91.08055877685547, 'logps/ref_chosen': -57.188072204589844, 'logps/ref_rejected': -88.0166015625, 'logits/chosen': -0.5998705625534058, 'logits/rejected': -0.5423353910446167, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 3.1348772048950195, 'margin_dpo/beta_margin_mean': 0.3134877383708954, 'margin_dpo/beta_margin_std': 0.32677435874938965, 'margin_dpo/beta_margin_grad_mean': -0.4244069755077362, 'margin_dpo/beta_margin_grad_std': 0.07627448439598083, 'epoch': 0.07} + 7%|█████▌ | 48/681 [02:12<29:22, 2.79s/it] 7%|█████▋ | 49/681 [02:15<28:50, 2.74s/it] {'loss': 1.0774, 'grad_norm': 61.355953216552734, 'learning_rate': 3.478260869565217e-07, 'margin_dpo/margin_mean': 3.8097658157348633, 'margin_dpo/margin_std': 3.869323253631592, 'logps/chosen': -61.36932373046875, 'logps/rejected': -87.26129913330078, 'logps/ref_chosen': -61.685264587402344, 'logps/ref_rejected': -83.76747131347656, 'logits/chosen': -0.5448025465011597, 'logits/rejected': -0.4857603907585144, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 3.8097660541534424, 'margin_dpo/beta_margin_mean': 0.3809766173362732, 'margin_dpo/beta_margin_std': 0.3965732753276825, 'margin_dpo/beta_margin_grad_mean': -0.41020649671554565, 'margin_dpo/beta_margin_grad_std': 0.08793335407972336, 'epoch': 0.07} + 7%|█████▋ | 49/681 [02:15<28:50, 2.74s/it] 7%|█████▊ | 50/681 [02:18<28:38, 2.72s/it] {'loss': 1.0518, 'grad_norm': 62.80997085571289, 'learning_rate': 3.5507246376811595e-07, 'margin_dpo/margin_mean': 4.163365364074707, 'margin_dpo/margin_std': 4.094795227050781, 'logps/chosen': -58.89775848388672, 'logps/rejected': -100.69513702392578, 'logps/ref_chosen': -58.72413635253906, 'logps/ref_rejected': -96.35814666748047, 'logits/chosen': -0.5425491333007812, 'logits/rejected': -0.5065620541572571, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 4.163364887237549, 'margin_dpo/beta_margin_mean': 0.4163365066051483, 'margin_dpo/beta_margin_std': 0.4100196361541748, 'margin_dpo/beta_margin_grad_mean': -0.40193936228752136, 'margin_dpo/beta_margin_grad_std': 0.09261800348758698, 'epoch': 0.07} + 7%|█████▊ | 50/681 [02:18<28:38, 2.72s/it] 7%|█████▉ | 51/681 [02:21<29:04, 2.77s/it] {'loss': 1.085, 'grad_norm': 52.91781234741211, 'learning_rate': 3.6231884057971015e-07, 'margin_dpo/margin_mean': 4.017845153808594, 'margin_dpo/margin_std': 5.1221513748168945, 'logps/chosen': -61.69359588623047, 'logps/rejected': -80.33977508544922, 'logps/ref_chosen': -61.3736686706543, 'logps/ref_rejected': -76.00199890136719, 'logits/chosen': -0.5184497833251953, 'logits/rejected': -0.4852331280708313, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 4.017845153808594, 'margin_dpo/beta_margin_mean': 0.4017845094203949, 'margin_dpo/beta_margin_std': 0.5204705595970154, 'margin_dpo/beta_margin_grad_mean': -0.40868327021598816, 'margin_dpo/beta_margin_grad_std': 0.1110108494758606, 'epoch': 0.07} + 7%|█████▉ | 51/681 [02:21<29:04, 2.77s/it] 8%|██████ | 52/681 [02:24<29:32, 2.82s/it] {'loss': 0.9189, 'grad_norm': 58.923404693603516, 'learning_rate': 3.695652173913043e-07, 'margin_dpo/margin_mean': 6.196599006652832, 'margin_dpo/margin_std': 5.190753936767578, 'logps/chosen': -51.979454040527344, 'logps/rejected': -85.81260681152344, 'logps/ref_chosen': -52.33735656738281, 'logps/ref_rejected': -79.97391510009766, 'logits/chosen': -0.5524120330810547, 'logits/rejected': -0.496574342250824, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 6.196599006652832, 'margin_dpo/beta_margin_mean': 0.6196599006652832, 'margin_dpo/beta_margin_std': 0.5214123129844666, 'margin_dpo/beta_margin_grad_mean': -0.3595433533191681, 'margin_dpo/beta_margin_grad_std': 0.10714302211999893, 'epoch': 0.08} + 8%|██████ | 52/681 [02:24<29:32, 2.82s/it] 8%|██████▏ | 53/681 [02:26<28:48, 2.75s/it] {'loss': 0.9446, 'grad_norm': 58.20880889892578, 'learning_rate': 3.7681159420289855e-07, 'margin_dpo/margin_mean': 6.325778484344482, 'margin_dpo/margin_std': 6.248142242431641, 'logps/chosen': -53.506500244140625, 'logps/rejected': -98.30122375488281, 'logps/ref_chosen': -53.31465530395508, 'logps/ref_rejected': -91.7835922241211, 'logits/chosen': -0.6073682904243469, 'logits/rejected': -0.5856744050979614, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 6.325778961181641, 'margin_dpo/beta_margin_mean': 0.6325778961181641, 'margin_dpo/beta_margin_std': 0.6903671622276306, 'margin_dpo/beta_margin_grad_mean': -0.36480841040611267, 'margin_dpo/beta_margin_grad_std': 0.12540318071842194, 'epoch': 0.08} + 8%|██████▏ | 53/681 [02:26<28:48, 2.75s/it] 8%|██████▎ | 54/681 [02:29<27:39, 2.65s/it] {'loss': 0.9783, 'grad_norm': 59.29412841796875, 'learning_rate': 3.8405797101449274e-07, 'margin_dpo/margin_mean': 5.348155498504639, 'margin_dpo/margin_std': 5.086174488067627, 'logps/chosen': -51.13933563232422, 'logps/rejected': -97.51422119140625, 'logps/ref_chosen': -50.68865966796875, 'logps/ref_rejected': -91.71539306640625, 'logits/chosen': -0.633226752281189, 'logits/rejected': -0.5815136432647705, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 5.348156452178955, 'margin_dpo/beta_margin_mean': 0.5348156690597534, 'margin_dpo/beta_margin_std': 0.5101956725120544, 'margin_dpo/beta_margin_grad_mean': -0.3783862590789795, 'margin_dpo/beta_margin_grad_std': 0.10563214868307114, 'epoch': 0.08} + 8%|██████▎ | 54/681 [02:29<27:39, 2.65s/it] 8%|██████▍ | 55/681 [02:31<26:17, 2.52s/it] {'loss': 0.9548, 'grad_norm': 53.738956451416016, 'learning_rate': 3.9130434782608694e-07, 'margin_dpo/margin_mean': 6.541542053222656, 'margin_dpo/margin_std': 7.533283233642578, 'logps/chosen': -63.57060241699219, 'logps/rejected': -96.49041748046875, 'logps/ref_chosen': -62.615234375, 'logps/ref_rejected': -88.99349975585938, 'logits/chosen': -0.6361401081085205, 'logits/rejected': -0.5729630589485168, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 6.541542053222656, 'margin_dpo/beta_margin_mean': 0.6541542410850525, 'margin_dpo/beta_margin_std': 0.7597689032554626, 'margin_dpo/beta_margin_grad_mean': -0.361075222492218, 'margin_dpo/beta_margin_grad_std': 0.14729972183704376, 'epoch': 0.08} + 8%|██████▍ | 55/681 [02:31<26:17, 2.52s/it] 8%|██████▍ | 56/681 [02:34<27:07, 2.60s/it] {'loss': 0.9775, 'grad_norm': 48.09397506713867, 'learning_rate': 3.9855072463768114e-07, 'margin_dpo/margin_mean': 6.195199012756348, 'margin_dpo/margin_std': 7.399816989898682, 'logps/chosen': -58.66962432861328, 'logps/rejected': -101.10653686523438, 'logps/ref_chosen': -57.93273162841797, 'logps/ref_rejected': -94.1744384765625, 'logits/chosen': -0.5945051908493042, 'logits/rejected': -0.5514425039291382, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 6.195199966430664, 'margin_dpo/beta_margin_mean': 0.6195200085639954, 'margin_dpo/beta_margin_std': 0.7477858066558838, 'margin_dpo/beta_margin_grad_mean': -0.36780592799186707, 'margin_dpo/beta_margin_grad_std': 0.14850637316703796, 'epoch': 0.08} + 8%|██████▍ | 56/681 [02:34<27:07, 2.60s/it] 8%|██████▌ | 57/681 [02:36<26:48, 2.58s/it] {'loss': 0.9078, 'grad_norm': 54.234169006347656, 'learning_rate': 4.057971014492754e-07, 'margin_dpo/margin_mean': 6.902284145355225, 'margin_dpo/margin_std': 6.639451026916504, 'logps/chosen': -71.26276397705078, 'logps/rejected': -103.23522186279297, 'logps/ref_chosen': -70.49528503417969, 'logps/ref_rejected': -95.56546020507812, 'logits/chosen': -0.5641357898712158, 'logits/rejected': -0.5353480577468872, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 6.902284145355225, 'margin_dpo/beta_margin_mean': 0.6902284026145935, 'margin_dpo/beta_margin_std': 0.6726579070091248, 'margin_dpo/beta_margin_grad_mean': -0.34864187240600586, 'margin_dpo/beta_margin_grad_std': 0.13589681684970856, 'epoch': 0.08} + 8%|██████▌ | 57/681 [02:36<26:48, 2.58s/it] 9%|██████▋ | 58/681 [02:39<26:59, 2.60s/it] {'loss': 0.8977, 'grad_norm': 59.243927001953125, 'learning_rate': 4.1304347826086954e-07, 'margin_dpo/margin_mean': 7.606607437133789, 'margin_dpo/margin_std': 8.09335708618164, 'logps/chosen': -63.23316955566406, 'logps/rejected': -93.32413482666016, 'logps/ref_chosen': -62.13294219970703, 'logps/ref_rejected': -84.61729431152344, 'logits/chosen': -0.5894064903259277, 'logits/rejected': -0.5127171874046326, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 7.606607437133789, 'margin_dpo/beta_margin_mean': 0.7606607675552368, 'margin_dpo/beta_margin_std': 0.8165130615234375, 'margin_dpo/beta_margin_grad_mean': -0.3427189290523529, 'margin_dpo/beta_margin_grad_std': 0.15285082161426544, 'epoch': 0.09} + 9%|██████▋ | 58/681 [02:39<26:59, 2.60s/it] 9%|██████▊ | 59/681 [02:41<26:56, 2.60s/it] {'loss': 0.8575, 'grad_norm': 55.42934799194336, 'learning_rate': 4.2028985507246374e-07, 'margin_dpo/margin_mean': 8.485508918762207, 'margin_dpo/margin_std': 8.604471206665039, 'logps/chosen': -53.42650604248047, 'logps/rejected': -98.86468505859375, 'logps/ref_chosen': -51.932525634765625, 'logps/ref_rejected': -88.88520050048828, 'logits/chosen': -0.6423487663269043, 'logits/rejected': -0.6032625436782837, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 8.485508918762207, 'margin_dpo/beta_margin_mean': 0.8485509157180786, 'margin_dpo/beta_margin_std': 0.8816735148429871, 'margin_dpo/beta_margin_grad_mean': -0.32869505882263184, 'margin_dpo/beta_margin_grad_std': 0.15742561221122742, 'epoch': 0.09} + 9%|██████▊ | 59/681 [02:41<26:56, 2.60s/it] 9%|██████▉ | 60/681 [02:44<26:43, 2.58s/it] {'loss': 0.9555, 'grad_norm': 64.29039764404297, 'learning_rate': 4.2753623188405794e-07, 'margin_dpo/margin_mean': 6.686439514160156, 'margin_dpo/margin_std': 7.678452968597412, 'logps/chosen': -63.62670135498047, 'logps/rejected': -94.76435089111328, 'logps/ref_chosen': -60.94218444824219, 'logps/ref_rejected': -85.39340209960938, 'logits/chosen': -0.6296500563621521, 'logits/rejected': -0.5711052417755127, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 6.686439037322998, 'margin_dpo/beta_margin_mean': 0.6686438918113708, 'margin_dpo/beta_margin_std': 0.7756204009056091, 'margin_dpo/beta_margin_grad_mean': -0.3545895218849182, 'margin_dpo/beta_margin_grad_std': 0.15808549523353577, 'epoch': 0.09} + 9%|██████▉ | 60/681 [02:44<26:43, 2.58s/it] 9%|███████ | 61/681 [02:47<26:48, 2.59s/it] {'loss': 0.9341, 'grad_norm': 54.964107513427734, 'learning_rate': 4.3478260869565214e-07, 'margin_dpo/margin_mean': 8.251806259155273, 'margin_dpo/margin_std': 11.240764617919922, 'logps/chosen': -62.14350128173828, 'logps/rejected': -99.61428833007812, 'logps/ref_chosen': -60.633522033691406, 'logps/ref_rejected': -89.85249328613281, 'logits/chosen': -0.6372621655464172, 'logits/rejected': -0.6041065454483032, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 8.251806259155273, 'margin_dpo/beta_margin_mean': 0.8251805901527405, 'margin_dpo/beta_margin_std': 1.1422574520111084, 'margin_dpo/beta_margin_grad_mean': -0.34661665558815, 'margin_dpo/beta_margin_grad_std': 0.1781352162361145, 'epoch': 0.09} + 9%|███████ | 61/681 [02:47<26:48, 2.59s/it] 9%|███████▏ | 62/681 [02:50<28:06, 2.72s/it] {'loss': 0.9993, 'grad_norm': 58.057708740234375, 'learning_rate': 4.420289855072464e-07, 'margin_dpo/margin_mean': 6.19963264465332, 'margin_dpo/margin_std': 8.127958297729492, 'logps/chosen': -57.778465270996094, 'logps/rejected': -83.39352416992188, 'logps/ref_chosen': -56.15077209472656, 'logps/ref_rejected': -75.56619262695312, 'logits/chosen': -0.6090478897094727, 'logits/rejected': -0.5749986171722412, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 6.19963264465332, 'margin_dpo/beta_margin_mean': 0.6199632883071899, 'margin_dpo/beta_margin_std': 0.8312649130821228, 'margin_dpo/beta_margin_grad_mean': -0.37108179926872253, 'margin_dpo/beta_margin_grad_std': 0.15791070461273193, 'epoch': 0.09} + 9%|███████▏ | 62/681 [02:50<28:06, 2.72s/it] 9%|███████▎ | 63/681 [02:52<28:01, 2.72s/it] {'loss': 0.8773, 'grad_norm': 56.769561767578125, 'learning_rate': 4.4927536231884053e-07, 'margin_dpo/margin_mean': 8.366212844848633, 'margin_dpo/margin_std': 8.857807159423828, 'logps/chosen': -75.79495239257812, 'logps/rejected': -108.62382507324219, 'logps/ref_chosen': -73.14739227294922, 'logps/ref_rejected': -97.61006164550781, 'logits/chosen': -0.5860311388969421, 'logits/rejected': -0.5402973890304565, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 8.366211891174316, 'margin_dpo/beta_margin_mean': 0.8366211652755737, 'margin_dpo/beta_margin_std': 0.9040850400924683, 'margin_dpo/beta_margin_grad_mean': -0.3284067213535309, 'margin_dpo/beta_margin_grad_std': 0.16741114854812622, 'epoch': 0.09} + 9%|███████▎ | 63/681 [02:52<28:01, 2.72s/it] 9%|███████▍ | 64/681 [02:55<28:08, 2.74s/it] {'loss': 0.8493, 'grad_norm': 52.091590881347656, 'learning_rate': 4.5652173913043473e-07, 'margin_dpo/margin_mean': 9.82172966003418, 'margin_dpo/margin_std': 11.043643951416016, 'logps/chosen': -55.00431823730469, 'logps/rejected': -104.35765075683594, 'logps/ref_chosen': -53.99859619140625, 'logps/ref_rejected': -93.53020477294922, 'logits/chosen': -0.5791685581207275, 'logits/rejected': -0.5466402769088745, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 9.821728706359863, 'margin_dpo/beta_margin_mean': 0.9821729063987732, 'margin_dpo/beta_margin_std': 1.1361504793167114, 'margin_dpo/beta_margin_grad_mean': -0.3144356906414032, 'margin_dpo/beta_margin_grad_std': 0.1805381327867508, 'epoch': 0.09} + 9%|███████▍ | 64/681 [02:55<28:08, 2.74s/it] 10%|███████▌ | 65/681 [02:58<28:29, 2.78s/it] {'loss': 0.8585, 'grad_norm': 54.09811782836914, 'learning_rate': 4.63768115942029e-07, 'margin_dpo/margin_mean': 9.843679428100586, 'margin_dpo/margin_std': 10.951974868774414, 'logps/chosen': -68.0100326538086, 'logps/rejected': -122.96417236328125, 'logps/ref_chosen': -64.83599853515625, 'logps/ref_rejected': -109.94645690917969, 'logits/chosen': -0.6608457565307617, 'logits/rejected': -0.6478947401046753, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 9.843679428100586, 'margin_dpo/beta_margin_mean': 0.9843679666519165, 'margin_dpo/beta_margin_std': 1.1074903011322021, 'margin_dpo/beta_margin_grad_mean': -0.31038472056388855, 'margin_dpo/beta_margin_grad_std': 0.18928615748882294, 'epoch': 0.1} + 10%|███████▌ | 65/681 [02:58<28:29, 2.78s/it] 10%|███████▋ | 66/681 [03:01<28:07, 2.74s/it] {'loss': 0.8859, 'grad_norm': 52.60911560058594, 'learning_rate': 4.7101449275362313e-07, 'margin_dpo/margin_mean': 8.99482536315918, 'margin_dpo/margin_std': 10.87942123413086, 'logps/chosen': -54.36174011230469, 'logps/rejected': -87.54934692382812, 'logps/ref_chosen': -51.44352722167969, 'logps/ref_rejected': -75.63629150390625, 'logits/chosen': -0.6474887132644653, 'logits/rejected': -0.6150294542312622, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 8.99482536315918, 'margin_dpo/beta_margin_mean': 0.8994826078414917, 'margin_dpo/beta_margin_std': 1.1073994636535645, 'margin_dpo/beta_margin_grad_mean': -0.3307109475135803, 'margin_dpo/beta_margin_grad_std': 0.1775081753730774, 'epoch': 0.1} + 10%|███████▋ | 66/681 [03:01<28:07, 2.74s/it] 10%|███████▊ | 67/681 [03:03<26:38, 2.60s/it] {'loss': 0.8693, 'grad_norm': 52.46964645385742, 'learning_rate': 4.782608695652174e-07, 'margin_dpo/margin_mean': 9.277162551879883, 'margin_dpo/margin_std': 10.92019271850586, 'logps/chosen': -61.81807327270508, 'logps/rejected': -84.54171752929688, 'logps/ref_chosen': -59.34080505371094, 'logps/ref_rejected': -72.78729248046875, 'logits/chosen': -0.5966418981552124, 'logits/rejected': -0.5537301301956177, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 9.2771635055542, 'margin_dpo/beta_margin_mean': 0.9277163147926331, 'margin_dpo/beta_margin_std': 1.1012858152389526, 'margin_dpo/beta_margin_grad_mean': -0.32543134689331055, 'margin_dpo/beta_margin_grad_std': 0.17773009836673737, 'epoch': 0.1} + 10%|███████▊ | 67/681 [03:03<26:38, 2.60s/it] 10%|███████▉ | 68/681 [03:05<26:27, 2.59s/it] {'loss': 0.8459, 'grad_norm': 52.40779113769531, 'learning_rate': 4.855072463768116e-07, 'margin_dpo/margin_mean': 8.720624923706055, 'margin_dpo/margin_std': 8.963220596313477, 'logps/chosen': -67.98988342285156, 'logps/rejected': -88.71192932128906, 'logps/ref_chosen': -65.2058334350586, 'logps/ref_rejected': -77.20724487304688, 'logits/chosen': -0.6349166631698608, 'logits/rejected': -0.5751150250434875, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 8.720624923706055, 'margin_dpo/beta_margin_mean': 0.8720625042915344, 'margin_dpo/beta_margin_std': 0.9045540690422058, 'margin_dpo/beta_margin_grad_mean': -0.3253341615200043, 'margin_dpo/beta_margin_grad_std': 0.15630275011062622, 'epoch': 0.1} + 10%|███████▉ | 68/681 [03:05<26:27, 2.59s/it] 10%|████████ | 69/681 [03:08<27:08, 2.66s/it] {'loss': 0.7777, 'grad_norm': 53.23897933959961, 'learning_rate': 4.927536231884058e-07, 'margin_dpo/margin_mean': 10.385248184204102, 'margin_dpo/margin_std': 10.297136306762695, 'logps/chosen': -62.99334716796875, 'logps/rejected': -116.94822692871094, 'logps/ref_chosen': -59.81924057006836, 'logps/ref_rejected': -103.38886260986328, 'logits/chosen': -0.6085792183876038, 'logits/rejected': -0.5847188234329224, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 10.385248184204102, 'margin_dpo/beta_margin_mean': 1.0385247468948364, 'margin_dpo/beta_margin_std': 1.040853500366211, 'margin_dpo/beta_margin_grad_mean': -0.30277135968208313, 'margin_dpo/beta_margin_grad_std': 0.16077764332294464, 'epoch': 0.1} + 10%|████████ | 69/681 [03:08<27:08, 2.66s/it] 10%|████████ | 70/681 [03:11<26:28, 2.60s/it] {'loss': 0.7928, 'grad_norm': 59.40316390991211, 'learning_rate': 5e-07, 'margin_dpo/margin_mean': 11.182600975036621, 'margin_dpo/margin_std': 11.917827606201172, 'logps/chosen': -66.4103012084961, 'logps/rejected': -106.7230453491211, 'logps/ref_chosen': -61.930641174316406, 'logps/ref_rejected': -91.060791015625, 'logits/chosen': -0.625554621219635, 'logits/rejected': -0.5908818244934082, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 11.182600975036621, 'margin_dpo/beta_margin_mean': 1.118260145187378, 'margin_dpo/beta_margin_std': 1.199088215827942, 'margin_dpo/beta_margin_grad_mean': -0.30000537633895874, 'margin_dpo/beta_margin_grad_std': 0.18498124182224274, 'epoch': 0.1} + 10%|████████ | 70/681 [03:11<26:28, 2.60s/it] 10%|████████▏ | 71/681 [03:13<26:21, 2.59s/it] {'loss': 0.702, 'grad_norm': 49.68572998046875, 'learning_rate': 4.999967061337492e-07, 'margin_dpo/margin_mean': 12.864418029785156, 'margin_dpo/margin_std': 12.424565315246582, 'logps/chosen': -65.69276428222656, 'logps/rejected': -114.14346313476562, 'logps/ref_chosen': -61.750343322753906, 'logps/ref_rejected': -97.33662414550781, 'logits/chosen': -0.6752599477767944, 'logits/rejected': -0.6361984014511108, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 12.86441707611084, 'margin_dpo/beta_margin_mean': 1.2864418029785156, 'margin_dpo/beta_margin_std': 1.286440372467041, 'margin_dpo/beta_margin_grad_mean': -0.27304375171661377, 'margin_dpo/beta_margin_grad_std': 0.16720205545425415, 'epoch': 0.1} + 10%|████████▏ | 71/681 [03:13<26:21, 2.59s/it] 11%|████████▎ | 72/681 [03:16<26:40, 2.63s/it] {'loss': 0.7297, 'grad_norm': 59.574241638183594, 'learning_rate': 4.999868246217933e-07, 'margin_dpo/margin_mean': 13.383831024169922, 'margin_dpo/margin_std': 13.636287689208984, 'logps/chosen': -70.28240966796875, 'logps/rejected': -112.89981079101562, 'logps/ref_chosen': -66.05341339111328, 'logps/ref_rejected': -95.2869873046875, 'logits/chosen': -0.6442112922668457, 'logits/rejected': -0.6080772280693054, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 13.383831024169922, 'margin_dpo/beta_margin_mean': 1.3383830785751343, 'margin_dpo/beta_margin_std': 1.3651797771453857, 'margin_dpo/beta_margin_grad_mean': -0.2696942389011383, 'margin_dpo/beta_margin_grad_std': 0.19517795741558075, 'epoch': 0.11} + 11%|████████▎ | 72/681 [03:16<26:40, 2.63s/it] 11%|████████▍ | 73/681 [03:19<27:07, 2.68s/it] {'loss': 0.9513, 'grad_norm': 76.11861419677734, 'learning_rate': 4.999703557245192e-07, 'margin_dpo/margin_mean': 13.051246643066406, 'margin_dpo/margin_std': 18.630680084228516, 'logps/chosen': -72.03385162353516, 'logps/rejected': -109.28495788574219, 'logps/ref_chosen': -66.25627136230469, 'logps/ref_rejected': -90.45613861083984, 'logits/chosen': -0.6918191909790039, 'logits/rejected': -0.6510320901870728, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 13.051246643066406, 'margin_dpo/beta_margin_mean': 1.3051246404647827, 'margin_dpo/beta_margin_std': 1.8701282739639282, 'margin_dpo/beta_margin_grad_mean': -0.31014135479927063, 'margin_dpo/beta_margin_grad_std': 0.24917910993099213, 'epoch': 0.11} + 11%|████████▍ | 73/681 [03:19<27:07, 2.68s/it] 11%|████████▌ | 74/681 [03:21<26:36, 2.63s/it] {'loss': 0.8775, 'grad_norm': 71.0533676147461, 'learning_rate': 4.999472998758977e-07, 'margin_dpo/margin_mean': 13.770397186279297, 'margin_dpo/margin_std': 20.299190521240234, 'logps/chosen': -59.54771423339844, 'logps/rejected': -115.84016418457031, 'logps/ref_chosen': -53.42488098144531, 'logps/ref_rejected': -95.94693756103516, 'logits/chosen': -0.6222573518753052, 'logits/rejected': -0.6104036569595337, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 13.770398139953613, 'margin_dpo/beta_margin_mean': 1.3770397901535034, 'margin_dpo/beta_margin_std': 2.0495500564575195, 'margin_dpo/beta_margin_grad_mean': -0.28980112075805664, 'margin_dpo/beta_margin_grad_std': 0.22041486203670502, 'epoch': 0.11} + 11%|████████▌ | 74/681 [03:21<26:36, 2.63s/it] 11%|████████▋ | 75/681 [03:24<26:50, 2.66s/it] {'loss': 0.6084, 'grad_norm': 50.546207427978516, 'learning_rate': 4.999176576834721e-07, 'margin_dpo/margin_mean': 19.06302833557129, 'margin_dpo/margin_std': 18.35777473449707, 'logps/chosen': -57.421756744384766, 'logps/rejected': -135.87710571289062, 'logps/ref_chosen': -51.861663818359375, 'logps/ref_rejected': -111.25397491455078, 'logits/chosen': -0.6528257131576538, 'logits/rejected': -0.6429094672203064, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 19.063030242919922, 'margin_dpo/beta_margin_mean': 1.90630304813385, 'margin_dpo/beta_margin_std': 1.8703465461730957, 'margin_dpo/beta_margin_grad_mean': -0.22819584608078003, 'margin_dpo/beta_margin_grad_std': 0.2010163813829422, 'epoch': 0.11} + 11%|████████▋ | 75/681 [03:24<26:50, 2.66s/it] 11%|████████▊ | 76/681 [03:27<26:26, 2.62s/it] {'loss': 0.8122, 'grad_norm': 63.239871978759766, 'learning_rate': 4.998814299283415e-07, 'margin_dpo/margin_mean': 12.300118446350098, 'margin_dpo/margin_std': 14.157339096069336, 'logps/chosen': -59.91857147216797, 'logps/rejected': -97.16926574707031, 'logps/ref_chosen': -53.26604080200195, 'logps/ref_rejected': -78.21662139892578, 'logits/chosen': -0.7003756165504456, 'logits/rejected': -0.6578394770622253, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 12.300118446350098, 'margin_dpo/beta_margin_mean': 1.2300118207931519, 'margin_dpo/beta_margin_std': 1.421257495880127, 'margin_dpo/beta_margin_grad_mean': -0.28062668442726135, 'margin_dpo/beta_margin_grad_std': 0.20105010271072388, 'epoch': 0.11} + 11%|████████▊ | 76/681 [03:27<26:26, 2.62s/it] 11%|████████▉ | 77/681 [03:29<25:32, 2.54s/it] {'loss': 0.6829, 'grad_norm': 78.45389556884766, 'learning_rate': 4.998386175651409e-07, 'margin_dpo/margin_mean': 19.283706665039062, 'margin_dpo/margin_std': 19.11894989013672, 'logps/chosen': -63.619422912597656, 'logps/rejected': -118.58006286621094, 'logps/ref_chosen': -58.0966796875, 'logps/ref_rejected': -93.77361297607422, 'logits/chosen': -0.6659625768661499, 'logits/rejected': -0.6236972212791443, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 19.283708572387695, 'margin_dpo/beta_margin_mean': 1.9283708333969116, 'margin_dpo/beta_margin_std': 1.9269988536834717, 'margin_dpo/beta_margin_grad_mean': -0.2217966765165329, 'margin_dpo/beta_margin_grad_std': 0.22179701924324036, 'epoch': 0.11} + 11%|████████▉ | 77/681 [03:29<25:32, 2.54s/it] 11%|█████████ | 78/681 [03:32<25:56, 2.58s/it] {'loss': 0.7296, 'grad_norm': 66.56047058105469, 'learning_rate': 4.997892217220159e-07, 'margin_dpo/margin_mean': 14.69200325012207, 'margin_dpo/margin_std': 15.322187423706055, 'logps/chosen': -60.89007568359375, 'logps/rejected': -104.90266418457031, 'logps/ref_chosen': -55.61378479003906, 'logps/ref_rejected': -84.93436431884766, 'logits/chosen': -0.6366250514984131, 'logits/rejected': -0.6083469986915588, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 14.692002296447754, 'margin_dpo/beta_margin_mean': 1.4692002534866333, 'margin_dpo/beta_margin_std': 1.5563766956329346, 'margin_dpo/beta_margin_grad_mean': -0.2656141221523285, 'margin_dpo/beta_margin_grad_std': 0.21040529012680054, 'epoch': 0.11} + 11%|█████████ | 78/681 [03:32<25:56, 2.58s/it] 12%|█████████▏ | 79/681 [03:34<26:22, 2.63s/it] {'loss': 0.7766, 'grad_norm': 59.296844482421875, 'learning_rate': 4.997332437005931e-07, 'margin_dpo/margin_mean': 16.086679458618164, 'margin_dpo/margin_std': 18.848827362060547, 'logps/chosen': -60.498695373535156, 'logps/rejected': -108.78245544433594, 'logps/ref_chosen': -55.45048522949219, 'logps/ref_rejected': -87.64756774902344, 'logits/chosen': -0.6760110855102539, 'logits/rejected': -0.6464430093765259, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 16.086679458618164, 'margin_dpo/beta_margin_mean': 1.6086679697036743, 'margin_dpo/beta_margin_std': 1.9045932292938232, 'margin_dpo/beta_margin_grad_mean': -0.2785935699939728, 'margin_dpo/beta_margin_grad_std': 0.22844330966472626, 'epoch': 0.12} + 12%|█████████▏ | 79/681 [03:34<26:22, 2.63s/it] 12%|█████████▎ | 80/681 [03:37<26:14, 2.62s/it] {'loss': 0.8355, 'grad_norm': 63.66164016723633, 'learning_rate': 4.996706849759452e-07, 'margin_dpo/margin_mean': 14.238598823547363, 'margin_dpo/margin_std': 17.483436584472656, 'logps/chosen': -65.51264190673828, 'logps/rejected': -108.77944946289062, 'logps/ref_chosen': -58.519290924072266, 'logps/ref_rejected': -87.54750061035156, 'logits/chosen': -0.7178832292556763, 'logits/rejected': -0.6710443496704102, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 14.238598823547363, 'margin_dpo/beta_margin_mean': 1.42385995388031, 'margin_dpo/beta_margin_std': 1.8568590879440308, 'margin_dpo/beta_margin_grad_mean': -0.293730765581131, 'margin_dpo/beta_margin_grad_std': 0.22851316630840302, 'epoch': 0.12} + 12%|█████████▎ | 80/681 [03:37<26:14, 2.62s/it] 12%|█████████▍ | 81/681 [03:40<27:24, 2.74s/it] {'loss': 0.6904, 'grad_norm': 72.20431518554688, 'learning_rate': 4.996015471965529e-07, 'margin_dpo/margin_mean': 18.687902450561523, 'margin_dpo/margin_std': 20.542957305908203, 'logps/chosen': -72.02912902832031, 'logps/rejected': -153.9308624267578, 'logps/ref_chosen': -66.44886779785156, 'logps/ref_rejected': -129.66270446777344, 'logits/chosen': -0.7084971070289612, 'logits/rejected': -0.6748213171958923, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 18.687902450561523, 'margin_dpo/beta_margin_mean': 1.8687902688980103, 'margin_dpo/beta_margin_std': 2.0691728591918945, 'margin_dpo/beta_margin_grad_mean': -0.24315199255943298, 'margin_dpo/beta_margin_grad_std': 0.22722284495830536, 'epoch': 0.12} + 12%|█████████▍ | 81/681 [03:40<27:24, 2.74s/it] 12%|█████████▌ | 82/681 [03:42<26:35, 2.66s/it] {'loss': 0.9632, 'grad_norm': 87.32213592529297, 'learning_rate': 4.995258321842611e-07, 'margin_dpo/margin_mean': 15.053365707397461, 'margin_dpo/margin_std': 21.363815307617188, 'logps/chosen': -59.366302490234375, 'logps/rejected': -112.9305419921875, 'logps/ref_chosen': -52.232383728027344, 'logps/ref_rejected': -90.74325561523438, 'logits/chosen': -0.6286877393722534, 'logits/rejected': -0.6112765073776245, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 15.053364753723145, 'margin_dpo/beta_margin_mean': 1.5053365230560303, 'margin_dpo/beta_margin_std': 2.1997368335723877, 'margin_dpo/beta_margin_grad_mean': -0.2833250164985657, 'margin_dpo/beta_margin_grad_std': 0.2522350549697876, 'epoch': 0.12} + 12%|█████████▌ | 82/681 [03:42<26:35, 2.66s/it] 12%|█████████▋ | 83/681 [03:45<25:49, 2.59s/it] {'loss': 0.7422, 'grad_norm': 67.5205307006836, 'learning_rate': 4.994435419342304e-07, 'margin_dpo/margin_mean': 16.844711303710938, 'margin_dpo/margin_std': 18.56102752685547, 'logps/chosen': -62.771873474121094, 'logps/rejected': -127.50509643554688, 'logps/ref_chosen': -55.82738494873047, 'logps/ref_rejected': -103.71590423583984, 'logits/chosen': -0.6808498501777649, 'logits/rejected': -0.6353092193603516, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 16.844711303710938, 'margin_dpo/beta_margin_mean': 1.6844712495803833, 'margin_dpo/beta_margin_std': 1.8570791482925415, 'margin_dpo/beta_margin_grad_mean': -0.2590833604335785, 'margin_dpo/beta_margin_grad_std': 0.22852419316768646, 'epoch': 0.12} + 12%|█████████▋ | 83/681 [03:45<25:49, 2.59s/it] 12%|█████████▋ | 84/681 [03:48<26:19, 2.65s/it] {'loss': 0.6737, 'grad_norm': 58.463897705078125, 'learning_rate': 4.993546786148857e-07, 'margin_dpo/margin_mean': 15.18610954284668, 'margin_dpo/margin_std': 13.861265182495117, 'logps/chosen': -72.32835388183594, 'logps/rejected': -107.63688659667969, 'logps/ref_chosen': -67.1761703491211, 'logps/ref_rejected': -87.29859924316406, 'logits/chosen': -0.6490943431854248, 'logits/rejected': -0.6113982200622559, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 15.186108589172363, 'margin_dpo/beta_margin_mean': 1.5186108350753784, 'margin_dpo/beta_margin_std': 1.4150245189666748, 'margin_dpo/beta_margin_grad_mean': -0.244083434343338, 'margin_dpo/beta_margin_grad_std': 0.19765815138816833, 'epoch': 0.12} + 12%|█████████▋ | 84/681 [03:48<26:19, 2.65s/it] 12%|█████████▊ | 85/681 [03:51<27:18, 2.75s/it] {'loss': 0.7715, 'grad_norm': 64.56900787353516, 'learning_rate': 4.992592445678582e-07, 'margin_dpo/margin_mean': 14.56472110748291, 'margin_dpo/margin_std': 15.904397010803223, 'logps/chosen': -64.1954345703125, 'logps/rejected': -98.99234008789062, 'logps/ref_chosen': -58.406620025634766, 'logps/ref_rejected': -78.63880157470703, 'logits/chosen': -0.6331249475479126, 'logits/rejected': -0.6021745204925537, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 14.564720153808594, 'margin_dpo/beta_margin_mean': 1.4564720392227173, 'margin_dpo/beta_margin_std': 1.6396623849868774, 'margin_dpo/beta_margin_grad_mean': -0.27991896867752075, 'margin_dpo/beta_margin_grad_std': 0.2167506366968155, 'epoch': 0.12} + 12%|█████████▊ | 85/681 [03:51<27:18, 2.75s/it] 13%|█████████▉ | 86/681 [03:53<27:42, 2.79s/it] {'loss': 0.9186, 'grad_norm': 117.56181335449219, 'learning_rate': 4.991572423079235e-07, 'margin_dpo/margin_mean': 15.120819091796875, 'margin_dpo/margin_std': 21.751773834228516, 'logps/chosen': -63.10496520996094, 'logps/rejected': -110.20996856689453, 'logps/ref_chosen': -56.13746643066406, 'logps/ref_rejected': -88.12165069580078, 'logits/chosen': -0.6592001914978027, 'logits/rejected': -0.6417681574821472, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 15.120819091796875, 'margin_dpo/beta_margin_mean': 1.5120818614959717, 'margin_dpo/beta_margin_std': 2.213315725326538, 'margin_dpo/beta_margin_grad_mean': -0.2995266318321228, 'margin_dpo/beta_margin_grad_std': 0.24782723188400269, 'epoch': 0.13} + 13%|█████████▉ | 86/681 [03:54<27:42, 2.79s/it] 13%|██████████ | 87/681 [03:56<27:28, 2.78s/it] {'loss': 0.7859, 'grad_norm': 66.58505249023438, 'learning_rate': 4.990486745229364e-07, 'margin_dpo/margin_mean': 16.435949325561523, 'margin_dpo/margin_std': 18.915019989013672, 'logps/chosen': -62.457305908203125, 'logps/rejected': -118.72473907470703, 'logps/ref_chosen': -55.63609313964844, 'logps/ref_rejected': -95.46757507324219, 'logits/chosen': -0.7072494626045227, 'logits/rejected': -0.670096755027771, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 16.435949325561523, 'margin_dpo/beta_margin_mean': 1.6435949802398682, 'margin_dpo/beta_margin_std': 1.9270051717758179, 'margin_dpo/beta_margin_grad_mean': -0.2566095292568207, 'margin_dpo/beta_margin_grad_std': 0.23276211321353912, 'epoch': 0.13} + 13%|██████████ | 87/681 [03:56<27:28, 2.78s/it] 13%|██████████▏ | 88/681 [03:59<28:07, 2.85s/it] {'loss': 0.9197, 'grad_norm': 75.93292999267578, 'learning_rate': 4.989335440737586e-07, 'margin_dpo/margin_mean': 12.606681823730469, 'margin_dpo/margin_std': 15.93301773071289, 'logps/chosen': -82.13240051269531, 'logps/rejected': -127.77642822265625, 'logps/ref_chosen': -73.67115020751953, 'logps/ref_rejected': -106.70849609375, 'logits/chosen': -0.6478073596954346, 'logits/rejected': -0.6310935020446777, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 12.606681823730469, 'margin_dpo/beta_margin_mean': 1.260668158531189, 'margin_dpo/beta_margin_std': 1.7176685333251953, 'margin_dpo/beta_margin_grad_mean': -0.3014436364173889, 'margin_dpo/beta_margin_grad_std': 0.23827531933784485, 'epoch': 0.13} + 13%|██████████▏ | 88/681 [03:59<28:07, 2.85s/it] 13%|██████████▎ | 89/681 [04:02<27:02, 2.74s/it] {'loss': 0.7412, 'grad_norm': 56.17230224609375, 'learning_rate': 4.988118539941847e-07, 'margin_dpo/margin_mean': 12.958423614501953, 'margin_dpo/margin_std': 13.854536056518555, 'logps/chosen': -65.11277770996094, 'logps/rejected': -99.52984619140625, 'logps/ref_chosen': -60.624916076660156, 'logps/ref_rejected': -82.08354949951172, 'logits/chosen': -0.6928755640983582, 'logits/rejected': -0.6521140336990356, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 12.958422660827637, 'margin_dpo/beta_margin_mean': 1.2958422899246216, 'margin_dpo/beta_margin_std': 1.4058305025100708, 'margin_dpo/beta_margin_grad_mean': -0.27584946155548096, 'margin_dpo/beta_margin_grad_std': 0.18205879628658295, 'epoch': 0.13} + 13%|██████████▎ | 89/681 [04:02<27:02, 2.74s/it] 13%|██████████▍ | 90/681 [04:04<26:07, 2.65s/it] {'loss': 0.8411, 'grad_norm': 66.36186981201172, 'learning_rate': 4.986836074908615e-07, 'margin_dpo/margin_mean': 15.813644409179688, 'margin_dpo/margin_std': 20.459163665771484, 'logps/chosen': -59.482887268066406, 'logps/rejected': -133.55593872070312, 'logps/ref_chosen': -53.285308837890625, 'logps/ref_rejected': -111.54470825195312, 'logits/chosen': -0.6513394713401794, 'logits/rejected': -0.6424415111541748, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 15.813644409179688, 'margin_dpo/beta_margin_mean': 1.5813645124435425, 'margin_dpo/beta_margin_std': 2.088043451309204, 'margin_dpo/beta_margin_grad_mean': -0.2849555015563965, 'margin_dpo/beta_margin_grad_std': 0.22966991364955902, 'epoch': 0.13} + 13%|██████████▍ | 90/681 [04:04<26:07, 2.65s/it] 13%|██████████▌ | 91/681 [04:07<25:59, 2.64s/it] {'loss': 0.762, 'grad_norm': 65.70479583740234, 'learning_rate': 4.985488079432037e-07, 'margin_dpo/margin_mean': 15.881075859069824, 'margin_dpo/margin_std': 17.554851531982422, 'logps/chosen': -67.02444458007812, 'logps/rejected': -108.97652435302734, 'logps/ref_chosen': -61.80295944213867, 'logps/ref_rejected': -87.87395477294922, 'logits/chosen': -0.695541262626648, 'logits/rejected': -0.6568491458892822, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 15.88107681274414, 'margin_dpo/beta_margin_mean': 1.588107705116272, 'margin_dpo/beta_margin_std': 1.7729498147964478, 'margin_dpo/beta_margin_grad_mean': -0.27236229181289673, 'margin_dpo/beta_margin_grad_std': 0.23041805624961853, 'epoch': 0.13} + 13%|██████████▌ | 91/681 [04:07<25:59, 2.64s/it] 14%|██████████▋ | 92/681 [04:09<26:05, 2.66s/it] {'loss': 0.8107, 'grad_norm': 60.354248046875, 'learning_rate': 4.984074589033043e-07, 'margin_dpo/margin_mean': 14.722909927368164, 'margin_dpo/margin_std': 17.423236846923828, 'logps/chosen': -56.71138000488281, 'logps/rejected': -97.6747055053711, 'logps/ref_chosen': -51.640769958496094, 'logps/ref_rejected': -77.88117980957031, 'logits/chosen': -0.7051235437393188, 'logits/rejected': -0.6763289570808411, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 14.722909927368164, 'margin_dpo/beta_margin_mean': 1.4722909927368164, 'margin_dpo/beta_margin_std': 1.7639739513397217, 'margin_dpo/beta_margin_grad_mean': -0.2847803235054016, 'margin_dpo/beta_margin_grad_std': 0.230974480509758, 'epoch': 0.14} + 14%|██████████▋ | 92/681 [04:10<26:05, 2.66s/it] 14%|██████████▊ | 93/681 [04:12<24:32, 2.50s/it] {'loss': 0.6862, 'grad_norm': 48.63566589355469, 'learning_rate': 4.982595640958425e-07, 'margin_dpo/margin_mean': 14.930685043334961, 'margin_dpo/margin_std': 15.499519348144531, 'logps/chosen': -57.98681640625, 'logps/rejected': -97.54901123046875, 'logps/ref_chosen': -52.529239654541016, 'logps/ref_rejected': -77.1607437133789, 'logits/chosen': -0.7185451984405518, 'logits/rejected': -0.6557145714759827, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 14.930684089660645, 'margin_dpo/beta_margin_mean': 1.4930684566497803, 'margin_dpo/beta_margin_std': 1.5610140562057495, 'margin_dpo/beta_margin_grad_mean': -0.25878626108169556, 'margin_dpo/beta_margin_grad_std': 0.19036650657653809, 'epoch': 0.14} + 14%|██████████▊ | 93/681 [04:12<24:32, 2.50s/it] 14%|██████████▉ | 94/681 [04:15<25:34, 2.61s/it] {'loss': 0.6489, 'grad_norm': 51.54408264160156, 'learning_rate': 4.98105127417984e-07, 'margin_dpo/margin_mean': 15.727283477783203, 'margin_dpo/margin_std': 14.665702819824219, 'logps/chosen': -67.19898986816406, 'logps/rejected': -121.30268859863281, 'logps/ref_chosen': -61.22261047363281, 'logps/ref_rejected': -99.59902954101562, 'logits/chosen': -0.6754232048988342, 'logits/rejected': -0.6463443040847778, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 15.72728443145752, 'margin_dpo/beta_margin_mean': 1.5727283954620361, 'margin_dpo/beta_margin_std': 1.4842208623886108, 'margin_dpo/beta_margin_grad_mean': -0.2494257092475891, 'margin_dpo/beta_margin_grad_std': 0.1913631409406662, 'epoch': 0.14} + 14%|██████████▉ | 94/681 [04:15<25:34, 2.61s/it] 14%|███████████ | 95/681 [04:17<25:07, 2.57s/it] {'loss': 0.7168, 'grad_norm': 49.133419036865234, 'learning_rate': 4.979441529392784e-07, 'margin_dpo/margin_mean': 12.927990913391113, 'margin_dpo/margin_std': 12.453845977783203, 'logps/chosen': -57.13197326660156, 'logps/rejected': -93.41667175292969, 'logps/ref_chosen': -52.52364730834961, 'logps/ref_rejected': -75.88035583496094, 'logits/chosen': -0.7007203102111816, 'logits/rejected': -0.6629537343978882, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 12.927990913391113, 'margin_dpo/beta_margin_mean': 1.2927991151809692, 'margin_dpo/beta_margin_std': 1.250800371170044, 'margin_dpo/beta_margin_grad_mean': -0.26963499188423157, 'margin_dpo/beta_margin_grad_std': 0.18063600361347198, 'epoch': 0.14} + 14%|███████████ | 95/681 [04:17<25:07, 2.57s/it] 14%|███████████▏ | 96/681 [04:20<25:16, 2.59s/it] {'loss': 0.6242, 'grad_norm': 50.28048324584961, 'learning_rate': 4.977766449015534e-07, 'margin_dpo/margin_mean': 17.10177230834961, 'margin_dpo/margin_std': 16.97222328186035, 'logps/chosen': -65.92119598388672, 'logps/rejected': -117.46200561523438, 'logps/ref_chosen': -62.15697479248047, 'logps/ref_rejected': -96.59601593017578, 'logits/chosen': -0.6796199083328247, 'logits/rejected': -0.6378945708274841, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 17.101774215698242, 'margin_dpo/beta_margin_mean': 1.7101774215698242, 'margin_dpo/beta_margin_std': 1.7064077854156494, 'margin_dpo/beta_margin_grad_mean': -0.23479405045509338, 'margin_dpo/beta_margin_grad_std': 0.18774589896202087, 'epoch': 0.14} + 14%|███████████▏ | 96/681 [04:20<25:16, 2.59s/it] 14%|███████████▎ | 97/681 [04:22<25:48, 2.65s/it] {'loss': 0.6845, 'grad_norm': 53.316551208496094, 'learning_rate': 4.976026077188012e-07, 'margin_dpo/margin_mean': 13.75833511352539, 'margin_dpo/margin_std': 12.287176132202148, 'logps/chosen': -59.297088623046875, 'logps/rejected': -95.37380981445312, 'logps/ref_chosen': -54.64636993408203, 'logps/ref_rejected': -76.96475219726562, 'logits/chosen': -0.6628963947296143, 'logits/rejected': -0.6078641414642334, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 13.758334159851074, 'margin_dpo/beta_margin_mean': 1.375833511352539, 'margin_dpo/beta_margin_std': 1.2895034551620483, 'margin_dpo/beta_margin_grad_mean': -0.26004087924957275, 'margin_dpo/beta_margin_grad_std': 0.18338225781917572, 'epoch': 0.14} + 14%|███████████▎ | 97/681 [04:22<25:48, 2.65s/it] 14%|███████████▎ | 98/681 [04:25<25:06, 2.58s/it] {'loss': 0.748, 'grad_norm': 59.0107536315918, 'learning_rate': 4.974220459770639e-07, 'margin_dpo/margin_mean': 14.806069374084473, 'margin_dpo/margin_std': 15.232458114624023, 'logps/chosen': -71.05479431152344, 'logps/rejected': -117.12971496582031, 'logps/ref_chosen': -65.25862884521484, 'logps/ref_rejected': -96.5274887084961, 'logits/chosen': -0.6648178100585938, 'logits/rejected': -0.6405047178268433, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 14.806070327758789, 'margin_dpo/beta_margin_mean': 1.480607032775879, 'margin_dpo/beta_margin_std': 1.5301272869110107, 'margin_dpo/beta_margin_grad_mean': -0.25851550698280334, 'margin_dpo/beta_margin_grad_std': 0.21575090289115906, 'epoch': 0.14} + 14%|███████████▎ | 98/681 [04:25<25:06, 2.58s/it] 15%|███████████▍ | 99/681 [04:27<24:02, 2.48s/it] {'loss': 0.6371, 'grad_norm': 48.36380386352539, 'learning_rate': 4.972349644343108e-07, 'margin_dpo/margin_mean': 16.02899932861328, 'margin_dpo/margin_std': 16.368377685546875, 'logps/chosen': -50.50402069091797, 'logps/rejected': -107.33246612548828, 'logps/ref_chosen': -45.63848114013672, 'logps/ref_rejected': -86.43792724609375, 'logits/chosen': -0.6831210851669312, 'logits/rejected': -0.6707972884178162, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 16.02899932861328, 'margin_dpo/beta_margin_mean': 1.6028999090194702, 'margin_dpo/beta_margin_std': 1.6380233764648438, 'margin_dpo/beta_margin_grad_mean': -0.24989381432533264, 'margin_dpo/beta_margin_grad_std': 0.17566484212875366, 'epoch': 0.15} + 15%|███████████▍ | 99/681 [04:27<24:02, 2.48s/it] 15%|███████████▍ | 100/681 [04:30<25:19, 2.62s/it] {'loss': 0.9045, 'grad_norm': 66.3724365234375, 'learning_rate': 4.970413680203148e-07, 'margin_dpo/margin_mean': 11.581655502319336, 'margin_dpo/margin_std': 15.148920059204102, 'logps/chosen': -62.664703369140625, 'logps/rejected': -90.71258544921875, 'logps/ref_chosen': -57.5939826965332, 'logps/ref_rejected': -74.06021118164062, 'logits/chosen': -0.6820343732833862, 'logits/rejected': -0.6383761167526245, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 11.581656455993652, 'margin_dpo/beta_margin_mean': 1.158165693283081, 'margin_dpo/beta_margin_std': 1.5505050420761108, 'margin_dpo/beta_margin_grad_mean': -0.30843135714530945, 'margin_dpo/beta_margin_grad_std': 0.22273820638656616, 'epoch': 0.15} + 15%|███████████▍ | 100/681 [04:30<25:19, 2.62s/it][INFO|trainer.py:4307] 2026-04-17 21:31:00,913 >> +***** Running Evaluation ***** +[INFO|trainer.py:4309] 2026-04-17 21:31:00,913 >> Num examples = 2339 +[INFO|trainer.py:4312] 2026-04-17 21:31:00,913 >> Batch size = 8 + + 0%| | 0/73 [00:00> +***** Running Evaluation ***** +[INFO|trainer.py:4309] 2026-04-17 21:36:01,990 >> Num examples = 2339 +[INFO|trainer.py:4312] 2026-04-17 21:36:01,990 >> Batch size = 8 + + 0%| | 0/73 [00:00> Saving model checkpoint to /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/checkpoint-200 +[INFO|configuration_utils.py:419] 2026-04-17 21:36:59,407 >> Configuration saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/checkpoint-200/config.json +[INFO|configuration_utils.py:911] 2026-04-17 21:36:59,411 >> Configuration saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/checkpoint-200/generation_config.json +[INFO|modeling_utils.py:3580] 2026-04-17 21:37:51,624 >> The model is bigger than the maximum size per checkpoint (5GB) and is going to be split in 6 checkpoint shards. You can find where each parameters has been saved in the index located at /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/checkpoint-200/model.safetensors.index.json. +[INFO|tokenization_utils_base.py:2510] 2026-04-17 21:37:51,632 >> tokenizer config file saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/checkpoint-200/tokenizer_config.json +[INFO|tokenization_utils_base.py:2519] 2026-04-17 21:37:51,645 >> Special tokens file saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/checkpoint-200/special_tokens_map.json + 30%|█████████████████████▊ | 201/681 [15:06<13:39:01, 102.38s/it] {'loss': 0.5336, 'grad_norm': 52.3123893737793, 'learning_rate': 4.455721242469372e-07, 'margin_dpo/margin_mean': 25.56842803955078, 'margin_dpo/margin_std': 23.23642349243164, 'logps/chosen': -83.8150634765625, 'logps/rejected': -148.78948974609375, 'logps/ref_chosen': -75.4022216796875, 'logps/ref_rejected': -114.80821990966797, 'logits/chosen': -0.6279897689819336, 'logits/rejected': -0.5977976322174072, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 25.56842613220215, 'margin_dpo/beta_margin_mean': 2.556842803955078, 'margin_dpo/beta_margin_std': 2.3333568572998047, 'margin_dpo/beta_margin_grad_mean': -0.18962885439395905, 'margin_dpo/beta_margin_grad_std': 0.21612294018268585, 'epoch': 0.3} + 30%|█████████████████████▊ | 201/681 [15:06<13:39:01, 102.38s/it] 30%|██████████████████████▌ | 202/681 [15:09<9:38:42, 72.49s/it] {'loss': 0.7708, 'grad_norm': 71.65006256103516, 'learning_rate': 4.4477014363141755e-07, 'margin_dpo/margin_mean': 18.85692596435547, 'margin_dpo/margin_std': 21.052621841430664, 'logps/chosen': -60.95005798339844, 'logps/rejected': -116.69070434570312, 'logps/ref_chosen': -50.101318359375, 'logps/ref_rejected': -86.98503112792969, 'logits/chosen': -0.6751070618629456, 'logits/rejected': -0.663360059261322, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 18.85692596435547, 'margin_dpo/beta_margin_mean': 1.8856925964355469, 'margin_dpo/beta_margin_std': 2.1072704792022705, 'margin_dpo/beta_margin_grad_mean': -0.25191596150398254, 'margin_dpo/beta_margin_grad_std': 0.24605146050453186, 'epoch': 0.3} + 30%|██████████████████████▌ | 202/681 [15:09<9:38:42, 72.49s/it] 30%|██████████████████████▋ | 203/681 [15:12<6:51:18, 51.63s/it] {'loss': 0.532, 'grad_norm': 45.57592010498047, 'learning_rate': 4.439630306414758e-07, 'margin_dpo/margin_mean': 21.060504913330078, 'margin_dpo/margin_std': 18.373455047607422, 'logps/chosen': -68.63609313964844, 'logps/rejected': -114.98287963867188, 'logps/ref_chosen': -60.60969543457031, 'logps/ref_rejected': -85.89596557617188, 'logits/chosen': -0.6758503317832947, 'logits/rejected': -0.632592499256134, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 21.060503005981445, 'margin_dpo/beta_margin_mean': 2.106050491333008, 'margin_dpo/beta_margin_std': 1.8404932022094727, 'margin_dpo/beta_margin_grad_mean': -0.20186971127986908, 'margin_dpo/beta_margin_grad_std': 0.19567202031612396, 'epoch': 0.3} + 30%|██████████████████████▋ | 203/681 [15:12<6:51:18, 51.63s/it] 30%|██████████████████████▊ | 204/681 [15:15<4:54:09, 37.00s/it] {'loss': 0.5197, 'grad_norm': 46.98274230957031, 'learning_rate': 4.431508065452897e-07, 'margin_dpo/margin_mean': 21.9324893951416, 'margin_dpo/margin_std': 19.7504825592041, 'logps/chosen': -89.8402099609375, 'logps/rejected': -119.30364990234375, 'logps/ref_chosen': -80.16496276855469, 'logps/ref_rejected': -87.69590759277344, 'logits/chosen': -0.640312910079956, 'logits/rejected': -0.5806652307510376, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 21.9324893951416, 'margin_dpo/beta_margin_mean': 2.193248987197876, 'margin_dpo/beta_margin_std': 2.008610725402832, 'margin_dpo/beta_margin_grad_mean': -0.1984976977109909, 'margin_dpo/beta_margin_grad_std': 0.19227007031440735, 'epoch': 0.3} + 30%|██████████████████████▊ | 204/681 [15:15<4:54:09, 37.00s/it] 30%|██████████████████████▉ | 205/681 [15:17<3:31:53, 26.71s/it] {'loss': 0.6391, 'grad_norm': 67.07288360595703, 'learning_rate': 4.4233349274571974e-07, 'margin_dpo/margin_mean': 23.78384780883789, 'margin_dpo/margin_std': 23.02971649169922, 'logps/chosen': -70.58891296386719, 'logps/rejected': -120.11308288574219, 'logps/ref_chosen': -59.384735107421875, 'logps/ref_rejected': -85.12505340576172, 'logits/chosen': -0.690580427646637, 'logits/rejected': -0.6472284197807312, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 23.78384780883789, 'margin_dpo/beta_margin_mean': 2.378384828567505, 'margin_dpo/beta_margin_std': 2.325178384780884, 'margin_dpo/beta_margin_grad_mean': -0.21127313375473022, 'margin_dpo/beta_margin_grad_std': 0.2417152225971222, 'epoch': 0.3} + 30%|██████████████████████▉ | 205/681 [15:18<3:31:53, 26.71s/it] 30%|██████████████████████▉ | 206/681 [15:20<2:33:29, 19.39s/it] {'loss': 0.4331, 'grad_norm': 44.62752914428711, 'learning_rate': 4.415111107797445e-07, 'margin_dpo/margin_mean': 27.303932189941406, 'margin_dpo/margin_std': 21.387907028198242, 'logps/chosen': -57.75178527832031, 'logps/rejected': -137.04470825195312, 'logps/ref_chosen': -46.964500427246094, 'logps/ref_rejected': -98.9534912109375, 'logits/chosen': -0.6897552013397217, 'logits/rejected': -0.6878204345703125, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 27.303930282592773, 'margin_dpo/beta_margin_mean': 2.730393171310425, 'margin_dpo/beta_margin_std': 2.1477179527282715, 'margin_dpo/beta_margin_grad_mean': -0.16189786791801453, 'margin_dpo/beta_margin_grad_std': 0.19883880019187927, 'epoch': 0.3} + 30%|██████████████████████▉ | 206/681 [15:20<2:33:29, 19.39s/it] 30%|███████████████████████ | 207/681 [15:22<1:53:25, 14.36s/it] {'loss': 0.4613, 'grad_norm': 51.993370056152344, 'learning_rate': 4.4068368231789365e-07, 'margin_dpo/margin_mean': 28.46685028076172, 'margin_dpo/margin_std': 25.901378631591797, 'logps/chosen': -64.26461791992188, 'logps/rejected': -121.12300872802734, 'logps/ref_chosen': -56.05625915527344, 'logps/ref_rejected': -84.44779968261719, 'logits/chosen': -0.6620955467224121, 'logits/rejected': -0.6329070925712585, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 28.46685028076172, 'margin_dpo/beta_margin_mean': 2.8466851711273193, 'margin_dpo/beta_margin_std': 2.5996739864349365, 'margin_dpo/beta_margin_grad_mean': -0.1773681491613388, 'margin_dpo/beta_margin_grad_std': 0.19014772772789001, 'epoch': 0.3} + 30%|███████████████████████ | 207/681 [15:22<1:53:25, 14.36s/it] 31%|███████████████████████▏ | 208/681 [15:25<1:25:31, 10.85s/it] {'loss': 0.4839, 'grad_norm': 52.05033493041992, 'learning_rate': 4.398512291636768e-07, 'margin_dpo/margin_mean': 23.237594604492188, 'margin_dpo/margin_std': 20.450965881347656, 'logps/chosen': -79.44926452636719, 'logps/rejected': -129.90614318847656, 'logps/ref_chosen': -67.06761169433594, 'logps/ref_rejected': -94.28689575195312, 'logits/chosen': -0.6714012622833252, 'logits/rejected': -0.6417751312255859, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 23.237592697143555, 'margin_dpo/beta_margin_mean': 2.3237593173980713, 'margin_dpo/beta_margin_std': 2.0613441467285156, 'margin_dpo/beta_margin_grad_mean': -0.19027359783649445, 'margin_dpo/beta_margin_grad_std': 0.18024224042892456, 'epoch': 0.31} + 31%|███████████████████████▏ | 208/681 [15:25<1:25:31, 10.85s/it] 31%|███████████████████████▎ | 209/681 [15:27<1:05:24, 8.31s/it] {'loss': 0.6043, 'grad_norm': 49.21156692504883, 'learning_rate': 4.3901377325300857e-07, 'margin_dpo/margin_mean': 23.724063873291016, 'margin_dpo/margin_std': 21.8853816986084, 'logps/chosen': -65.82164764404297, 'logps/rejected': -114.3055419921875, 'logps/ref_chosen': -56.18169403076172, 'logps/ref_rejected': -80.94152069091797, 'logits/chosen': -0.6503257751464844, 'logits/rejected': -0.6238174438476562, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 23.724063873291016, 'margin_dpo/beta_margin_mean': 2.372406482696533, 'margin_dpo/beta_margin_std': 2.2294185161590576, 'margin_dpo/beta_margin_grad_mean': -0.20685909688472748, 'margin_dpo/beta_margin_grad_std': 0.23329907655715942, 'epoch': 0.31} + 31%|███████████████████████▎ | 209/681 [15:28<1:05:24, 8.31s/it] 31%|████████████████████████ | 210/681 [15:30<51:28, 6.56s/it] {'loss': 0.5055, 'grad_norm': 47.80137634277344, 'learning_rate': 4.381713366536311e-07, 'margin_dpo/margin_mean': 22.228591918945312, 'margin_dpo/margin_std': 18.896484375, 'logps/chosen': -56.31086349487305, 'logps/rejected': -108.84925842285156, 'logps/ref_chosen': -46.371822357177734, 'logps/ref_rejected': -76.68162536621094, 'logits/chosen': -0.6832214593887329, 'logits/rejected': -0.6534386873245239, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 22.228591918945312, 'margin_dpo/beta_margin_mean': 2.2228593826293945, 'margin_dpo/beta_margin_std': 1.9132236242294312, 'margin_dpo/beta_margin_grad_mean': -0.19675123691558838, 'margin_dpo/beta_margin_grad_std': 0.18586638569831848, 'epoch': 0.31} + 31%|████████████████████████ | 210/681 [15:30<51:28, 6.56s/it] 31%|████████████████████████▏ | 211/681 [15:32<41:30, 5.30s/it] {'loss': 0.597, 'grad_norm': 59.744529724121094, 'learning_rate': 4.373239415645323e-07, 'margin_dpo/margin_mean': 22.822023391723633, 'margin_dpo/margin_std': 20.654033660888672, 'logps/chosen': -91.70536041259766, 'logps/rejected': -122.416015625, 'logps/ref_chosen': -78.93235778808594, 'logps/ref_rejected': -86.82098388671875, 'logits/chosen': -0.6856693029403687, 'logits/rejected': -0.6315619945526123, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 22.822023391723633, 'margin_dpo/beta_margin_mean': 2.2822024822235107, 'margin_dpo/beta_margin_std': 2.1043384075164795, 'margin_dpo/beta_margin_grad_mean': -0.20305074751377106, 'margin_dpo/beta_margin_grad_std': 0.23186683654785156, 'epoch': 0.31} + 31%|████████████████████████▏ | 211/681 [15:32<41:30, 5.30s/it] 31%|████████████████████████▎ | 212/681 [15:35<35:15, 4.51s/it] {'loss': 0.4566, 'grad_norm': 49.052188873291016, 'learning_rate': 4.3647161031536086e-07, 'margin_dpo/margin_mean': 28.725915908813477, 'margin_dpo/margin_std': 24.575883865356445, 'logps/chosen': -70.00460052490234, 'logps/rejected': -143.5913543701172, 'logps/ref_chosen': -58.19701385498047, 'logps/ref_rejected': -103.05784606933594, 'logits/chosen': -0.6898226737976074, 'logits/rejected': -0.6595550775527954, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 28.725915908813477, 'margin_dpo/beta_margin_mean': 2.872591733932495, 'margin_dpo/beta_margin_std': 2.5697410106658936, 'margin_dpo/beta_margin_grad_mean': -0.17350082099437714, 'margin_dpo/beta_margin_grad_std': 0.19579628109931946, 'epoch': 0.31} + 31%|████████████████████████▎ | 212/681 [15:35<35:15, 4.51s/it] 31%|████████████████████████▍ | 213/681 [15:38<30:49, 3.95s/it] {'loss': 0.4856, 'grad_norm': 51.70877456665039, 'learning_rate': 4.3561436536583774e-07, 'margin_dpo/margin_mean': 29.106151580810547, 'margin_dpo/margin_std': 25.876745223999023, 'logps/chosen': -77.46551513671875, 'logps/rejected': -132.9736785888672, 'logps/ref_chosen': -67.51271057128906, 'logps/ref_rejected': -93.91471862792969, 'logits/chosen': -0.6648838520050049, 'logits/rejected': -0.6213950514793396, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 29.10615348815918, 'margin_dpo/beta_margin_mean': 2.9106154441833496, 'margin_dpo/beta_margin_std': 2.622797966003418, 'margin_dpo/beta_margin_grad_mean': -0.1718873530626297, 'margin_dpo/beta_margin_grad_std': 0.20221562683582306, 'epoch': 0.31} + 31%|████████████████████████▍ | 213/681 [15:38<30:49, 3.95s/it] 31%|████████████████████████▌ | 214/681 [15:40<26:54, 3.46s/it] {'loss': 0.6245, 'grad_norm': 59.337345123291016, 'learning_rate': 4.3475222930516473e-07, 'margin_dpo/margin_mean': 23.016742706298828, 'margin_dpo/margin_std': 24.032947540283203, 'logps/chosen': -52.264801025390625, 'logps/rejected': -111.19406127929688, 'logps/ref_chosen': -41.604888916015625, 'logps/ref_rejected': -77.51741027832031, 'logits/chosen': -0.6546872854232788, 'logits/rejected': -0.6387699842453003, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 23.016742706298828, 'margin_dpo/beta_margin_mean': 2.3016743659973145, 'margin_dpo/beta_margin_std': 2.411076784133911, 'margin_dpo/beta_margin_grad_mean': -0.21652430295944214, 'margin_dpo/beta_margin_grad_std': 0.2097923308610916, 'epoch': 0.31} + 31%|████████████████████████▌ | 214/681 [15:40<26:54, 3.46s/it] 32%|████████████████████████▋ | 215/681 [15:43<25:31, 3.29s/it] {'loss': 0.5351, 'grad_norm': 56.185760498046875, 'learning_rate': 4.3388522485142885e-07, 'margin_dpo/margin_mean': 24.892501831054688, 'margin_dpo/margin_std': 23.67044448852539, 'logps/chosen': -63.87721252441406, 'logps/rejected': -125.45509338378906, 'logps/ref_chosen': -53.279266357421875, 'logps/ref_rejected': -89.96464538574219, 'logits/chosen': -0.6533488035202026, 'logits/rejected': -0.624896228313446, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 24.892501831054688, 'margin_dpo/beta_margin_mean': 2.4892501831054688, 'margin_dpo/beta_margin_std': 2.3997206687927246, 'margin_dpo/beta_margin_grad_mean': -0.1931016743183136, 'margin_dpo/beta_margin_grad_std': 0.20534518361091614, 'epoch': 0.32} + 32%|████████████████████████▋ | 215/681 [15:43<25:31, 3.29s/it] 32%|████████████████████████▋ | 216/681 [15:46<24:52, 3.21s/it] {'loss': 0.5867, 'grad_norm': 63.23625564575195, 'learning_rate': 4.330133748510036e-07, 'margin_dpo/margin_mean': 26.958616256713867, 'margin_dpo/margin_std': 26.038864135742188, 'logps/chosen': -61.9163932800293, 'logps/rejected': -117.18614196777344, 'logps/ref_chosen': -48.887794494628906, 'logps/ref_rejected': -77.19892883300781, 'logits/chosen': -0.6673412919044495, 'logits/rejected': -0.6347181797027588, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 26.958616256713867, 'margin_dpo/beta_margin_mean': 2.695861577987671, 'margin_dpo/beta_margin_std': 2.648024797439575, 'margin_dpo/beta_margin_grad_mean': -0.2070668637752533, 'margin_dpo/beta_margin_grad_std': 0.22892099618911743, 'epoch': 0.32} + 32%|████████████████████████▋ | 216/681 [15:46<24:52, 3.21s/it] 32%|████████████████████████▊ | 217/681 [15:48<23:21, 3.02s/it] {'loss': 0.4057, 'grad_norm': 40.630271911621094, 'learning_rate': 4.3213670227794757e-07, 'margin_dpo/margin_mean': 27.929889678955078, 'margin_dpo/margin_std': 21.507854461669922, 'logps/chosen': -60.836097717285156, 'logps/rejected': -138.99900817871094, 'logps/ref_chosen': -49.845306396484375, 'logps/ref_rejected': -100.07832336425781, 'logits/chosen': -0.6815335750579834, 'logits/rejected': -0.6386614441871643, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 27.929887771606445, 'margin_dpo/beta_margin_mean': 2.7929890155792236, 'margin_dpo/beta_margin_std': 2.152282953262329, 'margin_dpo/beta_margin_grad_mean': -0.15030139684677124, 'margin_dpo/beta_margin_grad_std': 0.1869634985923767, 'epoch': 0.32} + 32%|████████████████████████▊ | 217/681 [15:48<23:21, 3.02s/it] 32%|████████████████████████▉ | 218/681 [15:51<22:34, 2.93s/it] {'loss': 0.5357, 'grad_norm': 54.89970397949219, 'learning_rate': 4.3125523023339815e-07, 'margin_dpo/margin_mean': 24.937423706054688, 'margin_dpo/margin_std': 23.69991683959961, 'logps/chosen': -69.97561645507812, 'logps/rejected': -124.18275451660156, 'logps/ref_chosen': -58.576683044433594, 'logps/ref_rejected': -87.84639739990234, 'logits/chosen': -0.6562488079071045, 'logits/rejected': -0.6250983476638794, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 24.937423706054688, 'margin_dpo/beta_margin_mean': 2.4937424659729004, 'margin_dpo/beta_margin_std': 2.4085628986358643, 'margin_dpo/beta_margin_grad_mean': -0.19920094311237335, 'margin_dpo/beta_margin_grad_std': 0.20679454505443573, 'epoch': 0.32} + 32%|████████████████████████▉ | 218/681 [15:51<22:34, 2.93s/it] 32%|█████████████████████████ | 219/681 [15:54<22:06, 2.87s/it] {'loss': 0.5267, 'grad_norm': 60.82085037231445, 'learning_rate': 4.303689819449636e-07, 'margin_dpo/margin_mean': 22.007417678833008, 'margin_dpo/margin_std': 19.649311065673828, 'logps/chosen': -72.4845962524414, 'logps/rejected': -119.23858642578125, 'logps/ref_chosen': -61.083858489990234, 'logps/ref_rejected': -85.83042907714844, 'logits/chosen': -0.6473067998886108, 'logits/rejected': -0.6150014400482178, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 22.007417678833008, 'margin_dpo/beta_margin_mean': 2.200741767883301, 'margin_dpo/beta_margin_std': 1.9767764806747437, 'margin_dpo/beta_margin_grad_mean': -0.19810640811920166, 'margin_dpo/beta_margin_grad_std': 0.19194501638412476, 'epoch': 0.32} + 32%|█████████████████████████ | 219/681 [15:54<22:06, 2.87s/it] 32%|█████████████████████████▏ | 220/681 [15:56<21:30, 2.80s/it] {'loss': 0.5029, 'grad_norm': 47.847412109375, 'learning_rate': 4.2947798076611047e-07, 'margin_dpo/margin_mean': 20.889970779418945, 'margin_dpo/margin_std': 16.6192626953125, 'logps/chosen': -81.12652587890625, 'logps/rejected': -119.67072296142578, 'logps/ref_chosen': -70.03128051757812, 'logps/ref_rejected': -87.68551635742188, 'logits/chosen': -0.6549057960510254, 'logits/rejected': -0.6138431429862976, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 20.889970779418945, 'margin_dpo/beta_margin_mean': 2.0889971256256104, 'margin_dpo/beta_margin_std': 1.714986801147461, 'margin_dpo/beta_margin_grad_mean': -0.19834579527378082, 'margin_dpo/beta_margin_grad_std': 0.180747851729393, 'epoch': 0.32} + 32%|█████████████████████████▏ | 220/681 [15:57<21:30, 2.80s/it] 32%|█████████████████████████▎ | 221/681 [15:59<20:53, 2.73s/it] {'loss': 0.339, 'grad_norm': 48.202903747558594, 'learning_rate': 4.285822501755485e-07, 'margin_dpo/margin_mean': 32.81925582885742, 'margin_dpo/margin_std': 22.837791442871094, 'logps/chosen': -64.44230651855469, 'logps/rejected': -151.5745391845703, 'logps/ref_chosen': -52.15470886230469, 'logps/ref_rejected': -106.46768188476562, 'logits/chosen': -0.6510884761810303, 'logits/rejected': -0.6392531394958496, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 32.81925582885742, 'margin_dpo/beta_margin_mean': 3.281925678253174, 'margin_dpo/beta_margin_std': 2.298614740371704, 'margin_dpo/beta_margin_grad_mean': -0.12460769712924957, 'margin_dpo/beta_margin_grad_std': 0.1816825121641159, 'epoch': 0.32} + 32%|█████████████████████████▎ | 221/681 [15:59<20:53, 2.73s/it] 33%|█████████████████████████▍ | 222/681 [16:02<20:49, 2.72s/it] {'loss': 0.5718, 'grad_norm': 76.29179382324219, 'learning_rate': 4.276818137766118e-07, 'margin_dpo/margin_mean': 26.146484375, 'margin_dpo/margin_std': 24.794532775878906, 'logps/chosen': -74.65057373046875, 'logps/rejected': -139.82711791992188, 'logps/ref_chosen': -60.971099853515625, 'logps/ref_rejected': -100.00115203857422, 'logits/chosen': -0.7204064130783081, 'logits/rejected': -0.6859586238861084, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 26.146484375, 'margin_dpo/beta_margin_mean': 2.6146483421325684, 'margin_dpo/beta_margin_std': 2.517277479171753, 'margin_dpo/beta_margin_grad_mean': -0.19307678937911987, 'margin_dpo/beta_margin_grad_std': 0.22420592606067657, 'epoch': 0.33} + 33%|█████████████████████████▍ | 222/681 [16:02<20:49, 2.72s/it] 33%|█████████████████████████▌ | 223/681 [16:04<19:38, 2.57s/it] {'loss': 0.738, 'grad_norm': 78.53938293457031, 'learning_rate': 4.2677669529663686e-07, 'margin_dpo/margin_mean': 22.292381286621094, 'margin_dpo/margin_std': 22.842185974121094, 'logps/chosen': -68.55857849121094, 'logps/rejected': -121.03541564941406, 'logps/ref_chosen': -52.64057922363281, 'logps/ref_rejected': -82.82502746582031, 'logits/chosen': -0.7086101770401001, 'logits/rejected': -0.6605532169342041, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 22.292381286621094, 'margin_dpo/beta_margin_mean': 2.229238271713257, 'margin_dpo/beta_margin_std': 2.2963743209838867, 'margin_dpo/beta_margin_grad_mean': -0.22235842049121857, 'margin_dpo/beta_margin_grad_std': 0.26257219910621643, 'epoch': 0.33} + 33%|█████████████████████████▌ | 223/681 [16:04<19:38, 2.57s/it] 33%|█████████████████████████▋ | 224/681 [16:06<18:35, 2.44s/it] {'loss': 0.5673, 'grad_norm': 74.9097671508789, 'learning_rate': 4.2586691858633747e-07, 'margin_dpo/margin_mean': 26.778461456298828, 'margin_dpo/margin_std': 24.951221466064453, 'logps/chosen': -61.69850158691406, 'logps/rejected': -116.998046875, 'logps/ref_chosen': -48.59540939331055, 'logps/ref_rejected': -77.11648559570312, 'logits/chosen': -0.6751635074615479, 'logits/rejected': -0.6340160369873047, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 26.778461456298828, 'margin_dpo/beta_margin_mean': 2.6778461933135986, 'margin_dpo/beta_margin_std': 2.5320727825164795, 'margin_dpo/beta_margin_grad_mean': -0.19054511189460754, 'margin_dpo/beta_margin_grad_std': 0.2218094766139984, 'epoch': 0.33} + 33%|█████████████████████████▋ | 224/681 [16:06<18:35, 2.44s/it] 33%|█████████████████████████▊ | 225/681 [16:08<18:15, 2.40s/it] {'loss': 0.4077, 'grad_norm': 43.42683792114258, 'learning_rate': 4.249525076191759e-07, 'margin_dpo/margin_mean': 32.95307922363281, 'margin_dpo/margin_std': 26.78388214111328, 'logps/chosen': -72.66340637207031, 'logps/rejected': -147.5189208984375, 'logps/ref_chosen': -58.000465393066406, 'logps/ref_rejected': -99.90290832519531, 'logits/chosen': -0.6741304397583008, 'logits/rejected': -0.6419914960861206, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 32.95307922363281, 'margin_dpo/beta_margin_mean': 3.2953078746795654, 'margin_dpo/beta_margin_std': 2.6787664890289307, 'margin_dpo/beta_margin_grad_mean': -0.14978624880313873, 'margin_dpo/beta_margin_grad_std': 0.203273743391037, 'epoch': 0.33} + 33%|█████████████████████████▊ | 225/681 [16:08<18:15, 2.40s/it] 33%|█████████████████████████▉ | 226/681 [16:11<19:03, 2.51s/it] {'loss': 0.4851, 'grad_norm': 51.3669548034668, 'learning_rate': 4.2403348649073167e-07, 'margin_dpo/margin_mean': 25.563575744628906, 'margin_dpo/margin_std': 21.131916046142578, 'logps/chosen': -69.51836395263672, 'logps/rejected': -114.87089538574219, 'logps/ref_chosen': -58.898799896240234, 'logps/ref_rejected': -78.68775939941406, 'logits/chosen': -0.6864838600158691, 'logits/rejected': -0.6341279745101929, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 25.563575744628906, 'margin_dpo/beta_margin_mean': 2.5563576221466064, 'margin_dpo/beta_margin_std': 2.2327442169189453, 'margin_dpo/beta_margin_grad_mean': -0.17993833124637604, 'margin_dpo/beta_margin_grad_std': 0.19534794986248016, 'epoch': 0.33} + 33%|█████████████████████████▉ | 226/681 [16:11<19:03, 2.51s/it] 33%|██████████████████████████ | 227/681 [16:14<19:00, 2.51s/it] {'loss': 0.4141, 'grad_norm': 48.44467544555664, 'learning_rate': 4.2310987941806615e-07, 'margin_dpo/margin_mean': 31.536819458007812, 'margin_dpo/margin_std': 26.029647827148438, 'logps/chosen': -70.79923248291016, 'logps/rejected': -142.67623901367188, 'logps/ref_chosen': -59.072181701660156, 'logps/ref_rejected': -99.41236877441406, 'logits/chosen': -0.6759487390518188, 'logits/rejected': -0.6457496881484985, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 31.53681755065918, 'margin_dpo/beta_margin_mean': 3.153681993484497, 'margin_dpo/beta_margin_std': 2.6311187744140625, 'margin_dpo/beta_margin_grad_mean': -0.15354523062705994, 'margin_dpo/beta_margin_grad_std': 0.1981254369020462, 'epoch': 0.33} + 33%|██████████████████████████ | 227/681 [16:14<19:00, 2.51s/it] 33%|██████████████████████████ | 228/681 [16:17<20:26, 2.71s/it] {'loss': 0.5336, 'grad_norm': 55.06504821777344, 'learning_rate': 4.2218171073908463e-07, 'margin_dpo/margin_mean': 24.27182960510254, 'margin_dpo/margin_std': 20.79153823852539, 'logps/chosen': -78.89456176757812, 'logps/rejected': -128.3238525390625, 'logps/ref_chosen': -65.89129638671875, 'logps/ref_rejected': -91.04875183105469, 'logits/chosen': -0.6563422679901123, 'logits/rejected': -0.6227169036865234, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 24.27182960510254, 'margin_dpo/beta_margin_mean': 2.427182912826538, 'margin_dpo/beta_margin_std': 2.091808795928955, 'margin_dpo/beta_margin_grad_mean': -0.18722449243068695, 'margin_dpo/beta_margin_grad_std': 0.2196025550365448, 'epoch': 0.33} + 33%|██████████████████████████ | 228/681 [16:17<20:26, 2.71s/it] 34%|██████████████████████████▏ | 229/681 [16:19<19:50, 2.63s/it] {'loss': 0.598, 'grad_norm': 64.2571792602539, 'learning_rate': 4.212490049118951e-07, 'margin_dpo/margin_mean': 27.103031158447266, 'margin_dpo/margin_std': 25.39706802368164, 'logps/chosen': -85.07996368408203, 'logps/rejected': -126.00403594970703, 'logps/ref_chosen': -70.70636749267578, 'logps/ref_rejected': -84.52740478515625, 'logits/chosen': -0.6885573863983154, 'logits/rejected': -0.6359836459159851, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 27.1030330657959, 'margin_dpo/beta_margin_mean': 2.71030330657959, 'margin_dpo/beta_margin_std': 2.555929660797119, 'margin_dpo/beta_margin_grad_mean': -0.1898403912782669, 'margin_dpo/beta_margin_grad_std': 0.23285524547100067, 'epoch': 0.34} + 34%|██████████████████████████▏ | 229/681 [16:19<19:50, 2.63s/it] 34%|██████████████████████████▎ | 230/681 [16:22<19:16, 2.56s/it] {'loss': 0.5067, 'grad_norm': 50.68180465698242, 'learning_rate': 4.203117865141635e-07, 'margin_dpo/margin_mean': 30.37428092956543, 'margin_dpo/margin_std': 27.84336280822754, 'logps/chosen': -51.398292541503906, 'logps/rejected': -128.11248779296875, 'logps/ref_chosen': -39.282005310058594, 'logps/ref_rejected': -85.62191009521484, 'logits/chosen': -0.6804044842720032, 'logits/rejected': -0.6706264019012451, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 30.37428092956543, 'margin_dpo/beta_margin_mean': 3.037428140640259, 'margin_dpo/beta_margin_std': 2.7951576709747314, 'margin_dpo/beta_margin_grad_mean': -0.16133855283260345, 'margin_dpo/beta_margin_grad_std': 0.21256163716316223, 'epoch': 0.34} + 34%|██████████████████████████▎ | 230/681 [16:22<19:16, 2.56s/it] 34%|██████████████████████████▍ | 231/681 [16:24<19:31, 2.60s/it] {'loss': 0.4698, 'grad_norm': 42.53703689575195, 'learning_rate': 4.1937008024246625e-07, 'margin_dpo/margin_mean': 26.028377532958984, 'margin_dpo/margin_std': 24.996898651123047, 'logps/chosen': -74.62582397460938, 'logps/rejected': -111.50166320800781, 'logps/ref_chosen': -63.27644348144531, 'logps/ref_rejected': -74.1239013671875, 'logits/chosen': -0.6829984188079834, 'logits/rejected': -0.6394829750061035, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 26.028377532958984, 'margin_dpo/beta_margin_mean': 2.6028378009796143, 'margin_dpo/beta_margin_std': 2.508455276489258, 'margin_dpo/beta_margin_grad_mean': -0.18761104345321655, 'margin_dpo/beta_margin_grad_std': 0.17357850074768066, 'epoch': 0.34} + 34%|██████████████████████████▍ | 231/681 [16:24<19:31, 2.60s/it] 34%|██████████████████████████▌ | 232/681 [16:27<19:55, 2.66s/it] {'loss': 0.6921, 'grad_norm': 70.08275604248047, 'learning_rate': 4.1842391091163933e-07, 'margin_dpo/margin_mean': 21.211563110351562, 'margin_dpo/margin_std': 22.4114933013916, 'logps/chosen': -84.29617309570312, 'logps/rejected': -118.73604583740234, 'logps/ref_chosen': -70.74876403808594, 'logps/ref_rejected': -83.97706604003906, 'logits/chosen': -0.6572903394699097, 'logits/rejected': -0.5994934439659119, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 21.211563110351562, 'margin_dpo/beta_margin_mean': 2.1211562156677246, 'margin_dpo/beta_margin_std': 2.3135242462158203, 'margin_dpo/beta_margin_grad_mean': -0.2373134195804596, 'margin_dpo/beta_margin_grad_std': 0.24425449967384338, 'epoch': 0.34} + 34%|██████████████████████████▌ | 232/681 [16:27<19:55, 2.66s/it] 34%|██████████████████████████▋ | 233/681 [16:30<20:07, 2.69s/it] {'loss': 0.5602, 'grad_norm': 61.6278076171875, 'learning_rate': 4.174733034541245e-07, 'margin_dpo/margin_mean': 27.884885787963867, 'margin_dpo/margin_std': 26.062320709228516, 'logps/chosen': -67.88652801513672, 'logps/rejected': -148.36856079101562, 'logps/ref_chosen': -54.8829345703125, 'logps/ref_rejected': -107.48007202148438, 'logits/chosen': -0.6890474557876587, 'logits/rejected': -0.6643567085266113, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 27.8848876953125, 'margin_dpo/beta_margin_mean': 2.7884888648986816, 'margin_dpo/beta_margin_std': 2.659421682357788, 'margin_dpo/beta_margin_grad_mean': -0.18775954842567444, 'margin_dpo/beta_margin_grad_std': 0.23140782117843628, 'epoch': 0.34} + 34%|██████████████████████████▋ | 233/681 [16:30<20:07, 2.69s/it] 34%|██████████████████████████▊ | 234/681 [16:33<20:06, 2.70s/it] {'loss': 0.4561, 'grad_norm': 60.47285461425781, 'learning_rate': 4.165182829193126e-07, 'margin_dpo/margin_mean': 28.14922332763672, 'margin_dpo/margin_std': 21.847400665283203, 'logps/chosen': -54.90777587890625, 'logps/rejected': -138.9691162109375, 'logps/ref_chosen': -44.09451675415039, 'logps/ref_rejected': -100.00663757324219, 'logits/chosen': -0.6370252370834351, 'logits/rejected': -0.6381373405456543, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 28.14922523498535, 'margin_dpo/beta_margin_mean': 2.814922571182251, 'margin_dpo/beta_margin_std': 2.241670846939087, 'margin_dpo/beta_margin_grad_mean': -0.15120825171470642, 'margin_dpo/beta_margin_grad_std': 0.1929025799036026, 'epoch': 0.34} + 34%|██████████████████████████▊ | 234/681 [16:33<20:06, 2.70s/it] 35%|██████████████████████████▉ | 235/681 [16:35<19:30, 2.62s/it] {'loss': 0.5974, 'grad_norm': 63.21758270263672, 'learning_rate': 4.1555887447288255e-07, 'margin_dpo/margin_mean': 22.86014175415039, 'margin_dpo/margin_std': 22.919218063354492, 'logps/chosen': -77.54314422607422, 'logps/rejected': -128.5604248046875, 'logps/ref_chosen': -62.237911224365234, 'logps/ref_rejected': -90.39505767822266, 'logits/chosen': -0.6568065881729126, 'logits/rejected': -0.614643931388855, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 22.860143661499023, 'margin_dpo/beta_margin_mean': 2.2860143184661865, 'margin_dpo/beta_margin_std': 2.296712875366211, 'margin_dpo/beta_margin_grad_mean': -0.215665802359581, 'margin_dpo/beta_margin_grad_std': 0.2162242829799652, 'epoch': 0.35} + 35%|██████████████████████████▉ | 235/681 [16:35<19:30, 2.62s/it] 35%|███████████████████████████ | 236/681 [16:38<19:41, 2.65s/it] {'loss': 0.5646, 'grad_norm': 65.25566864013672, 'learning_rate': 4.1459510339613946e-07, 'margin_dpo/margin_mean': 25.488582611083984, 'margin_dpo/margin_std': 23.585155487060547, 'logps/chosen': -60.41249084472656, 'logps/rejected': -140.07135009765625, 'logps/ref_chosen': -49.34136199951172, 'logps/ref_rejected': -103.51162719726562, 'logits/chosen': -0.6559075117111206, 'logits/rejected': -0.6537389159202576, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 25.488582611083984, 'margin_dpo/beta_margin_mean': 2.548858165740967, 'margin_dpo/beta_margin_std': 2.3670222759246826, 'margin_dpo/beta_margin_grad_mean': -0.19891716539859772, 'margin_dpo/beta_margin_grad_std': 0.22555947303771973, 'epoch': 0.35} + 35%|███████████████████████████ | 236/681 [16:38<19:41, 2.65s/it] 35%|███████████████████████████▏ | 237/681 [16:41<19:49, 2.68s/it] {'loss': 0.5116, 'grad_norm': 48.03404235839844, 'learning_rate': 4.136269950853473e-07, 'margin_dpo/margin_mean': 27.52564811706543, 'margin_dpo/margin_std': 24.07387924194336, 'logps/chosen': -65.91875457763672, 'logps/rejected': -134.05665588378906, 'logps/ref_chosen': -54.168121337890625, 'logps/ref_rejected': -94.78036499023438, 'logits/chosen': -0.6702800989151001, 'logits/rejected': -0.636156439781189, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 27.52564811706543, 'margin_dpo/beta_margin_mean': 2.7525649070739746, 'margin_dpo/beta_margin_std': 2.4088451862335205, 'margin_dpo/beta_margin_grad_mean': -0.17360197007656097, 'margin_dpo/beta_margin_grad_std': 0.21111546456813812, 'epoch': 0.35} + 35%|███████████████████████████▏ | 237/681 [16:41<19:49, 2.68s/it] 35%|███████████████████████████▎ | 238/681 [16:43<20:07, 2.72s/it] {'loss': 0.4407, 'grad_norm': 39.46758270263672, 'learning_rate': 4.126545750510605e-07, 'margin_dpo/margin_mean': 24.640880584716797, 'margin_dpo/margin_std': 20.111305236816406, 'logps/chosen': -64.94898986816406, 'logps/rejected': -125.03469848632812, 'logps/ref_chosen': -53.973121643066406, 'logps/ref_rejected': -89.41795349121094, 'logits/chosen': -0.6305921077728271, 'logits/rejected': -0.6234115958213806, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 24.640880584716797, 'margin_dpo/beta_margin_mean': 2.464088201522827, 'margin_dpo/beta_margin_std': 2.025897979736328, 'margin_dpo/beta_margin_grad_mean': -0.17112761735916138, 'margin_dpo/beta_margin_grad_std': 0.17932020127773285, 'epoch': 0.35} + 35%|███████████████████████████▎ | 238/681 [16:43<20:07, 2.72s/it] 35%|███████████████████████████▎ | 239/681 [16:46<19:36, 2.66s/it] {'loss': 0.4436, 'grad_norm': 49.3748664855957, 'learning_rate': 4.116778689174514e-07, 'margin_dpo/margin_mean': 25.54654312133789, 'margin_dpo/margin_std': 19.89307975769043, 'logps/chosen': -70.67376708984375, 'logps/rejected': -131.71542358398438, 'logps/ref_chosen': -58.09782409667969, 'logps/ref_rejected': -93.59294128417969, 'logits/chosen': -0.7114957571029663, 'logits/rejected': -0.6843305826187134, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 25.54654312133789, 'margin_dpo/beta_margin_mean': 2.554654359817505, 'margin_dpo/beta_margin_std': 2.115661144256592, 'margin_dpo/beta_margin_grad_mean': -0.16616235673427582, 'margin_dpo/beta_margin_grad_std': 0.18609200417995453, 'epoch': 0.35} + 35%|███████████████████████████▎ | 239/681 [16:46<19:36, 2.66s/it] 35%|███████████████████████████▍ | 240/681 [16:49<19:27, 2.65s/it] {'loss': 0.6257, 'grad_norm': 60.53359603881836, 'learning_rate': 4.106969024216348e-07, 'margin_dpo/margin_mean': 22.555252075195312, 'margin_dpo/margin_std': 20.787620544433594, 'logps/chosen': -73.52519226074219, 'logps/rejected': -109.58448791503906, 'logps/ref_chosen': -60.6144905090332, 'logps/ref_rejected': -74.1185302734375, 'logits/chosen': -0.6911687850952148, 'logits/rejected': -0.6599963903427124, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 22.555252075195312, 'margin_dpo/beta_margin_mean': 2.2555253505706787, 'margin_dpo/beta_margin_std': 2.1107068061828613, 'margin_dpo/beta_margin_grad_mean': -0.2058243602514267, 'margin_dpo/beta_margin_grad_std': 0.2343757450580597, 'epoch': 0.35} + 35%|███████████████████████████▍ | 240/681 [16:49<19:27, 2.65s/it] 35%|███████████████████████████▌ | 241/681 [16:51<19:12, 2.62s/it] {'loss': 0.5099, 'grad_norm': 59.422630310058594, 'learning_rate': 4.097117014129903e-07, 'margin_dpo/margin_mean': 31.69961929321289, 'margin_dpo/margin_std': 29.628376007080078, 'logps/chosen': -76.52700805664062, 'logps/rejected': -130.19644165039062, 'logps/ref_chosen': -66.091064453125, 'logps/ref_rejected': -88.06088256835938, 'logits/chosen': -0.6552136540412903, 'logits/rejected': -0.6012428998947144, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 31.69961929321289, 'margin_dpo/beta_margin_mean': 3.169961929321289, 'margin_dpo/beta_margin_std': 3.125377655029297, 'margin_dpo/beta_margin_grad_mean': -0.1663733720779419, 'margin_dpo/beta_margin_grad_std': 0.22269202768802643, 'epoch': 0.35} + 35%|███████████████████████████▌ | 241/681 [16:51<19:12, 2.62s/it] 36%|███████████████████████████▋ | 242/681 [16:54<18:49, 2.57s/it] {'loss': 0.4934, 'grad_norm': 52.94541931152344, 'learning_rate': 4.087222918524807e-07, 'margin_dpo/margin_mean': 24.644126892089844, 'margin_dpo/margin_std': 21.64803123474121, 'logps/chosen': -79.44197845458984, 'logps/rejected': -119.58251190185547, 'logps/ref_chosen': -67.86392211914062, 'logps/ref_rejected': -83.36033630371094, 'logits/chosen': -0.6454315185546875, 'logits/rejected': -0.6136279702186584, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 24.644126892089844, 'margin_dpo/beta_margin_mean': 2.4644126892089844, 'margin_dpo/beta_margin_std': 2.184521198272705, 'margin_dpo/beta_margin_grad_mean': -0.1810285896062851, 'margin_dpo/beta_margin_grad_std': 0.19746260344982147, 'epoch': 0.36} + 36%|███████████████████████████▋ | 242/681 [16:54<18:49, 2.57s/it] 36%|███████████████████████████▊ | 243/681 [16:56<18:41, 2.56s/it] {'loss': 0.3271, 'grad_norm': 34.107791900634766, 'learning_rate': 4.07728699811968e-07, 'margin_dpo/margin_mean': 29.49079132080078, 'margin_dpo/margin_std': 21.805618286132812, 'logps/chosen': -74.12469482421875, 'logps/rejected': -116.86687469482422, 'logps/ref_chosen': -63.08424377441406, 'logps/ref_rejected': -76.33563232421875, 'logits/chosen': -0.6725857257843018, 'logits/rejected': -0.6084048748016357, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 29.49078941345215, 'margin_dpo/beta_margin_mean': 2.9490790367126465, 'margin_dpo/beta_margin_std': 2.1849277019500732, 'margin_dpo/beta_margin_grad_mean': -0.13316328823566437, 'margin_dpo/beta_margin_grad_std': 0.1522829383611679, 'epoch': 0.36} + 36%|███████████████████████████▊ | 243/681 [16:56<18:41, 2.56s/it] 36%|███████████████████████████▉ | 244/681 [16:59<18:40, 2.56s/it] {'loss': 0.4934, 'grad_norm': 42.87071228027344, 'learning_rate': 4.067309514735267e-07, 'margin_dpo/margin_mean': 25.319198608398438, 'margin_dpo/margin_std': 21.36996078491211, 'logps/chosen': -71.2780990600586, 'logps/rejected': -130.34854125976562, 'logps/ref_chosen': -61.14069366455078, 'logps/ref_rejected': -94.89193725585938, 'logits/chosen': -0.6881895065307617, 'logits/rejected': -0.6778185367584229, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 25.319198608398438, 'margin_dpo/beta_margin_mean': 2.5319199562072754, 'margin_dpo/beta_margin_std': 2.150766372680664, 'margin_dpo/beta_margin_grad_mean': -0.18467824161052704, 'margin_dpo/beta_margin_grad_std': 0.20196670293807983, 'epoch': 0.36} + 36%|███████████████████████████▉ | 244/681 [16:59<18:40, 2.56s/it] 36%|████████████████████████████ | 245/681 [17:01<19:06, 2.63s/it] {'loss': 0.5326, 'grad_norm': 74.6055679321289, 'learning_rate': 4.057290731287531e-07, 'margin_dpo/margin_mean': 26.900461196899414, 'margin_dpo/margin_std': 25.503192901611328, 'logps/chosen': -78.92977905273438, 'logps/rejected': -126.20805358886719, 'logps/ref_chosen': -67.26228332519531, 'logps/ref_rejected': -87.64010620117188, 'logits/chosen': -0.7033660411834717, 'logits/rejected': -0.6514378786087036, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 26.90045928955078, 'margin_dpo/beta_margin_mean': 2.6900460720062256, 'margin_dpo/beta_margin_std': 2.7254021167755127, 'margin_dpo/beta_margin_grad_mean': -0.1932898312807083, 'margin_dpo/beta_margin_grad_std': 0.2051628977060318, 'epoch': 0.36} + 36%|████████████████████████████ | 245/681 [17:02<19:06, 2.63s/it] 36%|████████████████████████████▏ | 246/681 [17:04<19:01, 2.62s/it] {'loss': 0.5288, 'grad_norm': 56.00790023803711, 'learning_rate': 4.047230911780736e-07, 'margin_dpo/margin_mean': 23.103233337402344, 'margin_dpo/margin_std': 21.10454559326172, 'logps/chosen': -78.0211181640625, 'logps/rejected': -118.77372741699219, 'logps/ref_chosen': -66.69696807861328, 'logps/ref_rejected': -84.34634399414062, 'logits/chosen': -0.7089934945106506, 'logits/rejected': -0.6705622673034668, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 23.103235244750977, 'margin_dpo/beta_margin_mean': 2.310323476791382, 'margin_dpo/beta_margin_std': 2.1114535331726074, 'margin_dpo/beta_margin_grad_mean': -0.1983981728553772, 'margin_dpo/beta_margin_grad_std': 0.19785138964653015, 'epoch': 0.36} + 36%|████████████████████████████▏ | 246/681 [17:04<19:01, 2.62s/it] 36%|████████████████████████████▎ | 247/681 [17:07<18:42, 2.59s/it] {'loss': 0.4045, 'grad_norm': 41.90789031982422, 'learning_rate': 4.0371303213004814e-07, 'margin_dpo/margin_mean': 32.50457000732422, 'margin_dpo/margin_std': 25.436208724975586, 'logps/chosen': -68.0724868774414, 'logps/rejected': -150.26498413085938, 'logps/ref_chosen': -56.6053466796875, 'logps/ref_rejected': -106.29327392578125, 'logits/chosen': -0.7110755443572998, 'logits/rejected': -0.6894150972366333, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 32.50457000732422, 'margin_dpo/beta_margin_mean': 3.2504570484161377, 'margin_dpo/beta_margin_std': 2.560331344604492, 'margin_dpo/beta_margin_grad_mean': -0.14395716786384583, 'margin_dpo/beta_margin_grad_std': 0.19858244061470032, 'epoch': 0.36} + 36%|████████████████████████████▎ | 247/681 [17:07<18:42, 2.59s/it] 36%|████████████████████████████▍ | 248/681 [17:09<18:36, 2.58s/it] {'loss': 0.4107, 'grad_norm': 42.92959213256836, 'learning_rate': 4.0269892260067197e-07, 'margin_dpo/margin_mean': 24.35607147216797, 'margin_dpo/margin_std': 19.101226806640625, 'logps/chosen': -54.540321350097656, 'logps/rejected': -126.71005249023438, 'logps/ref_chosen': -44.043216705322266, 'logps/ref_rejected': -91.85687255859375, 'logits/chosen': -0.6845219135284424, 'logits/rejected': -0.6683632135391235, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 24.35607147216797, 'margin_dpo/beta_margin_mean': 2.4356071949005127, 'margin_dpo/beta_margin_std': 1.9233942031860352, 'margin_dpo/beta_margin_grad_mean': -0.16844278573989868, 'margin_dpo/beta_margin_grad_std': 0.15559379756450653, 'epoch': 0.36} + 36%|████████████████████████████▍ | 248/681 [17:09<18:36, 2.58s/it] 37%|████████████████████████████▌ | 249/681 [17:12<18:50, 2.62s/it] {'loss': 0.6535, 'grad_norm': 59.13127517700195, 'learning_rate': 4.0168078931267426e-07, 'margin_dpo/margin_mean': 20.92279624938965, 'margin_dpo/margin_std': 20.69894790649414, 'logps/chosen': -74.95724487304688, 'logps/rejected': -113.90575408935547, 'logps/ref_chosen': -62.442352294921875, 'logps/ref_rejected': -80.46806335449219, 'logits/chosen': -0.7052150964736938, 'logits/rejected': -0.6669450998306274, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 20.92279624938965, 'margin_dpo/beta_margin_mean': 2.0922796726226807, 'margin_dpo/beta_margin_std': 2.0924148559570312, 'margin_dpo/beta_margin_grad_mean': -0.23194736242294312, 'margin_dpo/beta_margin_grad_std': 0.2308819442987442, 'epoch': 0.37} + 37%|████████████████████████████▌ | 249/681 [17:12<18:50, 2.62s/it] 37%|████████████████████████████▋ | 250/681 [17:15<19:32, 2.72s/it] {'loss': 0.4359, 'grad_norm': 34.11585235595703, 'learning_rate': 4.006586590948141e-07, 'margin_dpo/margin_mean': 25.81413459777832, 'margin_dpo/margin_std': 18.1165828704834, 'logps/chosen': -74.52294158935547, 'logps/rejected': -108.57221221923828, 'logps/ref_chosen': -65.6366958618164, 'logps/ref_rejected': -73.87183380126953, 'logits/chosen': -0.6944586038589478, 'logits/rejected': -0.6244109272956848, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 25.81413459777832, 'margin_dpo/beta_margin_mean': 2.581413507461548, 'margin_dpo/beta_margin_std': 1.8444185256958008, 'margin_dpo/beta_margin_grad_mean': -0.1552935689687729, 'margin_dpo/beta_margin_grad_std': 0.19923508167266846, 'epoch': 0.37} + 37%|████████████████████████████▋ | 250/681 [17:15<19:32, 2.72s/it] 37%|████████████████████████████▋ | 251/681 [17:17<18:53, 2.64s/it] {'loss': 0.4579, 'grad_norm': 44.37178039550781, 'learning_rate': 3.9963255888117325e-07, 'margin_dpo/margin_mean': 25.74152183532715, 'margin_dpo/margin_std': 20.57958984375, 'logps/chosen': -70.05519104003906, 'logps/rejected': -116.27742767333984, 'logps/ref_chosen': -57.182716369628906, 'logps/ref_rejected': -77.66343688964844, 'logits/chosen': -0.7029905319213867, 'logits/rejected': -0.6486064195632935, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 25.74152183532715, 'margin_dpo/beta_margin_mean': 2.5741522312164307, 'margin_dpo/beta_margin_std': 2.079760789871216, 'margin_dpo/beta_margin_grad_mean': -0.1774299144744873, 'margin_dpo/beta_margin_grad_std': 0.19099289178848267, 'epoch': 0.37} + 37%|████████████████████████████▋ | 251/681 [17:17<18:53, 2.64s/it] 37%|████████████████████████████▊ | 252/681 [17:20<18:51, 2.64s/it] {'loss': 0.4309, 'grad_norm': 53.544761657714844, 'learning_rate': 3.9860251571044666e-07, 'margin_dpo/margin_mean': 25.683515548706055, 'margin_dpo/margin_std': 19.43787384033203, 'logps/chosen': -83.42109680175781, 'logps/rejected': -122.17694854736328, 'logps/ref_chosen': -71.68563842773438, 'logps/ref_rejected': -84.75798797607422, 'logits/chosen': -0.6703172326087952, 'logits/rejected': -0.62431800365448, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 25.683515548706055, 'margin_dpo/beta_margin_mean': 2.5683515071868896, 'margin_dpo/beta_margin_std': 1.9699735641479492, 'margin_dpo/beta_margin_grad_mean': -0.15842175483703613, 'margin_dpo/beta_margin_grad_std': 0.1866365671157837, 'epoch': 0.37} + 37%|████████████████████████████▊ | 252/681 [17:20<18:51, 2.64s/it] 37%|████████████████████████████▉ | 253/681 [17:23<18:52, 2.65s/it] {'loss': 0.6253, 'grad_norm': 50.1516227722168, 'learning_rate': 3.9756855672522986e-07, 'margin_dpo/margin_mean': 24.096176147460938, 'margin_dpo/margin_std': 22.95254135131836, 'logps/chosen': -79.20399475097656, 'logps/rejected': -132.8687744140625, 'logps/ref_chosen': -69.13392639160156, 'logps/ref_rejected': -98.70252990722656, 'logits/chosen': -0.6842066049575806, 'logits/rejected': -0.654214084148407, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 24.096176147460938, 'margin_dpo/beta_margin_mean': 2.4096176624298096, 'margin_dpo/beta_margin_std': 2.3356130123138428, 'margin_dpo/beta_margin_grad_mean': -0.2049117386341095, 'margin_dpo/beta_margin_grad_std': 0.23132526874542236, 'epoch': 0.37} + 37%|████████████████████████████▉ | 253/681 [17:23<18:52, 2.65s/it] 37%|█████████████████████████████ | 254/681 [17:25<19:02, 2.68s/it] {'loss': 0.5557, 'grad_norm': 64.96926879882812, 'learning_rate': 3.965307091713037e-07, 'margin_dpo/margin_mean': 24.636829376220703, 'margin_dpo/margin_std': 22.574371337890625, 'logps/chosen': -64.82050323486328, 'logps/rejected': -125.6099853515625, 'logps/ref_chosen': -54.154998779296875, 'logps/ref_rejected': -90.30764770507812, 'logits/chosen': -0.7051047682762146, 'logits/rejected': -0.6575514078140259, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 24.636829376220703, 'margin_dpo/beta_margin_mean': 2.4636828899383545, 'margin_dpo/beta_margin_std': 2.2709455490112305, 'margin_dpo/beta_margin_grad_mean': -0.19986601173877716, 'margin_dpo/beta_margin_grad_std': 0.2192426323890686, 'epoch': 0.37} + 37%|█████████████████████████████ | 254/681 [17:25<19:02, 2.68s/it] 37%|█████████████████████████████▏ | 255/681 [17:28<19:01, 2.68s/it] {'loss': 0.6594, 'grad_norm': 66.39599609375, 'learning_rate': 3.954890003969163e-07, 'margin_dpo/margin_mean': 27.34168243408203, 'margin_dpo/margin_std': 28.012739181518555, 'logps/chosen': -70.39166259765625, 'logps/rejected': -130.80026245117188, 'logps/ref_chosen': -57.14167022705078, 'logps/ref_rejected': -90.2085952758789, 'logits/chosen': -0.7102745771408081, 'logits/rejected': -0.6797518730163574, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 27.34168243408203, 'margin_dpo/beta_margin_mean': 2.734168291091919, 'margin_dpo/beta_margin_std': 2.8561336994171143, 'margin_dpo/beta_margin_grad_mean': -0.1914447396993637, 'margin_dpo/beta_margin_grad_std': 0.22745780646800995, 'epoch': 0.37} + 37%|█████████████████████████████▏ | 255/681 [17:28<19:01, 2.68s/it] 38%|█████████████████████████████▎ | 256/681 [17:31<19:07, 2.70s/it] {'loss': 0.5121, 'grad_norm': 58.85321807861328, 'learning_rate': 3.944434578520628e-07, 'margin_dpo/margin_mean': 27.34583854675293, 'margin_dpo/margin_std': 25.045982360839844, 'logps/chosen': -68.35701751708984, 'logps/rejected': -133.102294921875, 'logps/ref_chosen': -55.163490295410156, 'logps/ref_rejected': -92.56291961669922, 'logits/chosen': -0.6565215587615967, 'logits/rejected': -0.6265472769737244, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 27.34583854675293, 'margin_dpo/beta_margin_mean': 2.734584093093872, 'margin_dpo/beta_margin_std': 2.515676498413086, 'margin_dpo/beta_margin_grad_mean': -0.17351847887039185, 'margin_dpo/beta_margin_grad_std': 0.20426101982593536, 'epoch': 0.38} + 38%|█████████████████████████████▎ | 256/681 [17:31<19:07, 2.70s/it] 38%|█████████████████████████████▍ | 257/681 [17:33<19:00, 2.69s/it] {'loss': 0.5015, 'grad_norm': 45.65961456298828, 'learning_rate': 3.933941090877615e-07, 'margin_dpo/margin_mean': 30.103281021118164, 'margin_dpo/margin_std': 25.718414306640625, 'logps/chosen': -61.90161895751953, 'logps/rejected': -122.11911010742188, 'logps/ref_chosen': -49.4236946105957, 'logps/ref_rejected': -79.53791809082031, 'logits/chosen': -0.6682260036468506, 'logits/rejected': -0.6451402902603149, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 30.103281021118164, 'margin_dpo/beta_margin_mean': 3.0103280544281006, 'margin_dpo/beta_margin_std': 2.688237190246582, 'margin_dpo/beta_margin_grad_mean': -0.17995263636112213, 'margin_dpo/beta_margin_grad_std': 0.22144293785095215, 'epoch': 0.38} + 38%|█████████████████████████████▍ | 257/681 [17:33<19:00, 2.69s/it] 38%|█████████████████████████████▌ | 258/681 [17:36<18:07, 2.57s/it] {'loss': 0.7407, 'grad_norm': 90.31965637207031, 'learning_rate': 3.923409817553284e-07, 'margin_dpo/margin_mean': 26.426227569580078, 'margin_dpo/margin_std': 27.302228927612305, 'logps/chosen': -75.35392761230469, 'logps/rejected': -138.38613891601562, 'logps/ref_chosen': -59.384124755859375, 'logps/ref_rejected': -95.9901123046875, 'logits/chosen': -0.6991258263587952, 'logits/rejected': -0.669155478477478, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 26.426227569580078, 'margin_dpo/beta_margin_mean': 2.642622947692871, 'margin_dpo/beta_margin_std': 2.7432861328125, 'margin_dpo/beta_margin_grad_mean': -0.2009788304567337, 'margin_dpo/beta_margin_grad_std': 0.24991276860237122, 'epoch': 0.38} + 38%|█████████████████████████████▌ | 258/681 [17:36<18:07, 2.57s/it] 38%|█████████████████████████████▋ | 259/681 [17:39<18:43, 2.66s/it] {'loss': 0.5311, 'grad_norm': 54.30337142944336, 'learning_rate': 3.9128410360564793e-07, 'margin_dpo/margin_mean': 23.94602394104004, 'margin_dpo/margin_std': 20.407352447509766, 'logps/chosen': -67.30290222167969, 'logps/rejected': -127.61224365234375, 'logps/ref_chosen': -52.828346252441406, 'logps/ref_rejected': -89.19165802001953, 'logits/chosen': -0.6342747211456299, 'logits/rejected': -0.6089296340942383, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 23.946025848388672, 'margin_dpo/beta_margin_mean': 2.3946025371551514, 'margin_dpo/beta_margin_std': 2.1142630577087402, 'margin_dpo/beta_margin_grad_mean': -0.1905156373977661, 'margin_dpo/beta_margin_grad_std': 0.20092925429344177, 'epoch': 0.38} + 38%|█████████████████████████████▋ | 259/681 [17:39<18:43, 2.66s/it] 38%|█████████████████████████████▊ | 260/681 [17:41<18:48, 2.68s/it] {'loss': 0.5057, 'grad_norm': 60.32538604736328, 'learning_rate': 3.9022350248844246e-07, 'margin_dpo/margin_mean': 27.156875610351562, 'margin_dpo/margin_std': 25.030288696289062, 'logps/chosen': -62.85065460205078, 'logps/rejected': -137.6796417236328, 'logps/ref_chosen': -47.41767501831055, 'logps/ref_rejected': -95.08979034423828, 'logits/chosen': -0.6234908103942871, 'logits/rejected': -0.6234794855117798, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 27.156875610351562, 'margin_dpo/beta_margin_mean': 2.7156875133514404, 'margin_dpo/beta_margin_std': 2.5070676803588867, 'margin_dpo/beta_margin_grad_mean': -0.18406428396701813, 'margin_dpo/beta_margin_grad_std': 0.20967774093151093, 'epoch': 0.38} + 38%|█████████████████████████████▊ | 260/681 [17:41<18:48, 2.68s/it] 38%|█████████████████████████████▉ | 261/681 [17:44<18:06, 2.59s/it] {'loss': 0.4748, 'grad_norm': 47.035186767578125, 'learning_rate': 3.891592063515376e-07, 'margin_dpo/margin_mean': 28.690322875976562, 'margin_dpo/margin_std': 26.378036499023438, 'logps/chosen': -65.26475524902344, 'logps/rejected': -129.43865966796875, 'logps/ref_chosen': -53.03137969970703, 'logps/ref_rejected': -88.51494598388672, 'logits/chosen': -0.6528719067573547, 'logits/rejected': -0.6170308589935303, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 28.690322875976562, 'margin_dpo/beta_margin_mean': 2.869032144546509, 'margin_dpo/beta_margin_std': 2.6597743034362793, 'margin_dpo/beta_margin_grad_mean': -0.17383930087089539, 'margin_dpo/beta_margin_grad_std': 0.20882652699947357, 'epoch': 0.38} + 38%|█████████████████████████████▉ | 261/681 [17:44<18:06, 2.59s/it] 38%|██████████████████████████████ | 262/681 [17:46<17:56, 2.57s/it] {'loss': 0.5286, 'grad_norm': 65.2990493774414, 'learning_rate': 3.880912432401264e-07, 'margin_dpo/margin_mean': 25.835269927978516, 'margin_dpo/margin_std': 21.91771697998047, 'logps/chosen': -74.31780242919922, 'logps/rejected': -126.95146179199219, 'logps/ref_chosen': -59.620140075683594, 'logps/ref_rejected': -86.41853332519531, 'logits/chosen': -0.6476384401321411, 'logits/rejected': -0.601101279258728, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 25.835268020629883, 'margin_dpo/beta_margin_mean': 2.583526849746704, 'margin_dpo/beta_margin_std': 2.1952381134033203, 'margin_dpo/beta_margin_grad_mean': -0.1703178882598877, 'margin_dpo/beta_margin_grad_std': 0.2238461673259735, 'epoch': 0.38} + 38%|██████████████████████████████ | 262/681 [17:46<17:56, 2.57s/it] 39%|██████████████████████████████ | 263/681 [17:49<18:24, 2.64s/it] {'loss': 0.4332, 'grad_norm': 63.93273162841797, 'learning_rate': 3.870196412960302e-07, 'margin_dpo/margin_mean': 30.601646423339844, 'margin_dpo/margin_std': 26.212867736816406, 'logps/chosen': -71.28265380859375, 'logps/rejected': -139.320556640625, 'logps/ref_chosen': -59.42094421386719, 'logps/ref_rejected': -96.85720825195312, 'logits/chosen': -0.6848942041397095, 'logits/rejected': -0.6289730072021484, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 30.601646423339844, 'margin_dpo/beta_margin_mean': 3.0601646900177, 'margin_dpo/beta_margin_std': 2.6217379570007324, 'margin_dpo/beta_margin_grad_mean': -0.16087834537029266, 'margin_dpo/beta_margin_grad_std': 0.1975705325603485, 'epoch': 0.39} + 39%|██████████████████████████████ | 263/681 [17:49<18:24, 2.64s/it] 39%|██████████████████████████████▏ | 264/681 [17:52<18:32, 2.67s/it] {'loss': 0.5449, 'grad_norm': 65.42985534667969, 'learning_rate': 3.8594442875695665e-07, 'margin_dpo/margin_mean': 24.097835540771484, 'margin_dpo/margin_std': 21.762907028198242, 'logps/chosen': -76.15332794189453, 'logps/rejected': -131.38528442382812, 'logps/ref_chosen': -62.722084045410156, 'logps/ref_rejected': -93.85621643066406, 'logits/chosen': -0.6409514546394348, 'logits/rejected': -0.6121193766593933, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 24.09783363342285, 'margin_dpo/beta_margin_mean': 2.409783363342285, 'margin_dpo/beta_margin_std': 2.2144694328308105, 'margin_dpo/beta_margin_grad_mean': -0.19213639199733734, 'margin_dpo/beta_margin_grad_std': 0.20564202964305878, 'epoch': 0.39} + 39%|██████████████████████████████▏ | 264/681 [17:52<18:32, 2.67s/it] 39%|██████████████████████████████▎ | 265/681 [17:54<18:26, 2.66s/it] {'loss': 0.5823, 'grad_norm': 73.85417938232422, 'learning_rate': 3.848656339557562e-07, 'margin_dpo/margin_mean': 25.292123794555664, 'margin_dpo/margin_std': 25.746461868286133, 'logps/chosen': -76.27545928955078, 'logps/rejected': -127.61671447753906, 'logps/ref_chosen': -61.971466064453125, 'logps/ref_rejected': -88.02059936523438, 'logits/chosen': -0.6545775532722473, 'logits/rejected': -0.6242020130157471, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 25.292123794555664, 'margin_dpo/beta_margin_mean': 2.529212474822998, 'margin_dpo/beta_margin_std': 2.583730459213257, 'margin_dpo/beta_margin_grad_mean': -0.20656327903270721, 'margin_dpo/beta_margin_grad_std': 0.2223885953426361, 'epoch': 0.39} + 39%|██████████████████████████████▎ | 265/681 [17:54<18:26, 2.66s/it] 39%|██████████████████████████████▍ | 266/681 [17:57<18:06, 2.62s/it] {'loss': 0.5648, 'grad_norm': 57.55160903930664, 'learning_rate': 3.8378328531967507e-07, 'margin_dpo/margin_mean': 24.757904052734375, 'margin_dpo/margin_std': 22.62070083618164, 'logps/chosen': -80.7601547241211, 'logps/rejected': -106.38961791992188, 'logps/ref_chosen': -67.09967041015625, 'logps/ref_rejected': -67.97122192382812, 'logits/chosen': -0.6736335754394531, 'logits/rejected': -0.6081231832504272, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 24.757902145385742, 'margin_dpo/beta_margin_mean': 2.47579026222229, 'margin_dpo/beta_margin_std': 2.267148017883301, 'margin_dpo/beta_margin_grad_mean': -0.20071399211883545, 'margin_dpo/beta_margin_grad_std': 0.22172965109348297, 'epoch': 0.39} + 39%|██████████████████████████████▍ | 266/681 [17:57<18:06, 2.62s/it] 39%|██████████████████████████████▌ | 267/681 [18:00<18:01, 2.61s/it] {'loss': 0.4124, 'grad_norm': 53.11775588989258, 'learning_rate': 3.8269741136960646e-07, 'margin_dpo/margin_mean': 27.410789489746094, 'margin_dpo/margin_std': 22.08306884765625, 'logps/chosen': -82.08721923828125, 'logps/rejected': -130.69570922851562, 'logps/ref_chosen': -68.97074890136719, 'logps/ref_rejected': -90.16844940185547, 'logits/chosen': -0.6374738216400146, 'logits/rejected': -0.5902992486953735, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 27.410789489746094, 'margin_dpo/beta_margin_mean': 2.7410788536071777, 'margin_dpo/beta_margin_std': 2.210850954055786, 'margin_dpo/beta_margin_grad_mean': -0.15888135135173798, 'margin_dpo/beta_margin_grad_std': 0.18455727398395538, 'epoch': 0.39} + 39%|██████████████████████████████▌ | 267/681 [18:00<18:01, 2.61s/it] 39%|██████████████████████████████▋ | 268/681 [18:02<18:18, 2.66s/it] {'loss': 0.4971, 'grad_norm': 62.39994812011719, 'learning_rate': 3.8160804071933894e-07, 'margin_dpo/margin_mean': 25.404464721679688, 'margin_dpo/margin_std': 21.592742919921875, 'logps/chosen': -68.46856689453125, 'logps/rejected': -139.620361328125, 'logps/ref_chosen': -55.900306701660156, 'logps/ref_rejected': -101.64763641357422, 'logits/chosen': -0.6283696293830872, 'logits/rejected': -0.6117571592330933, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 25.404462814331055, 'margin_dpo/beta_margin_mean': 2.5404462814331055, 'margin_dpo/beta_margin_std': 2.1823596954345703, 'margin_dpo/beta_margin_grad_mean': -0.1794806867837906, 'margin_dpo/beta_margin_grad_std': 0.21003910899162292, 'epoch': 0.39} + 39%|██████████████████████████████▋ | 268/681 [18:02<18:18, 2.66s/it] 40%|██████████████████████████████▊ | 269/681 [18:05<17:54, 2.61s/it] {'loss': 0.4067, 'grad_norm': 53.5538330078125, 'learning_rate': 3.8051520207480204e-07, 'margin_dpo/margin_mean': 32.81420135498047, 'margin_dpo/margin_std': 23.582063674926758, 'logps/chosen': -82.96507263183594, 'logps/rejected': -153.08908081054688, 'logps/ref_chosen': -70.03955078125, 'logps/ref_rejected': -107.34937286376953, 'logits/chosen': -0.6579411029815674, 'logits/rejected': -0.6127967238426208, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 32.81420135498047, 'margin_dpo/beta_margin_mean': 3.2814202308654785, 'margin_dpo/beta_margin_std': 2.366851329803467, 'margin_dpo/beta_margin_grad_mean': -0.14487290382385254, 'margin_dpo/beta_margin_grad_std': 0.21411198377609253, 'epoch': 0.4} + 40%|██████████████████████████████▊ | 269/681 [18:05<17:54, 2.61s/it] 40%|██████████████████████████████▉ | 270/681 [18:07<18:06, 2.64s/it] {'loss': 0.5061, 'grad_norm': 41.36155319213867, 'learning_rate': 3.794189242333106e-07, 'margin_dpo/margin_mean': 25.06671142578125, 'margin_dpo/margin_std': 22.077110290527344, 'logps/chosen': -80.42156219482422, 'logps/rejected': -145.8834228515625, 'logps/ref_chosen': -69.53347778320312, 'logps/ref_rejected': -109.92864990234375, 'logits/chosen': -0.6722906827926636, 'logits/rejected': -0.6524355411529541, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 25.06671142578125, 'margin_dpo/beta_margin_mean': 2.506671190261841, 'margin_dpo/beta_margin_std': 2.224198341369629, 'margin_dpo/beta_margin_grad_mean': -0.18635544180870056, 'margin_dpo/beta_margin_grad_std': 0.20361123979091644, 'epoch': 0.4} + 40%|██████████████████████████████▉ | 270/681 [18:08<18:06, 2.64s/it] 40%|███████████████████████████████ | 271/681 [18:10<17:40, 2.59s/it] {'loss': 0.534, 'grad_norm': 51.65666961669922, 'learning_rate': 3.7831923608280514e-07, 'margin_dpo/margin_mean': 25.931888580322266, 'margin_dpo/margin_std': 23.307266235351562, 'logps/chosen': -71.0101318359375, 'logps/rejected': -132.6912841796875, 'logps/ref_chosen': -56.76457214355469, 'logps/ref_rejected': -92.51383209228516, 'logits/chosen': -0.6164276599884033, 'logits/rejected': -0.5750702619552612, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 25.9318904876709, 'margin_dpo/beta_margin_mean': 2.593189001083374, 'margin_dpo/beta_margin_std': 2.331104040145874, 'margin_dpo/beta_margin_grad_mean': -0.18699227273464203, 'margin_dpo/beta_margin_grad_std': 0.21381914615631104, 'epoch': 0.4} + 40%|███████████████████████████████ | 271/681 [18:10<17:40, 2.59s/it] 40%|███████████████████████████████▏ | 272/681 [18:13<18:01, 2.64s/it] {'loss': 0.5359, 'grad_norm': 52.00728225708008, 'learning_rate': 3.772161666010912e-07, 'margin_dpo/margin_mean': 31.5368595123291, 'margin_dpo/margin_std': 27.00173568725586, 'logps/chosen': -62.51170349121094, 'logps/rejected': -150.09420776367188, 'logps/ref_chosen': -49.49715805053711, 'logps/ref_rejected': -105.54279327392578, 'logits/chosen': -0.6098858714103699, 'logits/rejected': -0.5980672836303711, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 31.5368595123291, 'margin_dpo/beta_margin_mean': 3.153686046600342, 'margin_dpo/beta_margin_std': 2.745887041091919, 'margin_dpo/beta_margin_grad_mean': -0.17077092826366425, 'margin_dpo/beta_margin_grad_std': 0.23799988627433777, 'epoch': 0.4} + 40%|███████████████████████████████▏ | 272/681 [18:13<18:01, 2.64s/it] 40%|███████████████████████████████▎ | 273/681 [18:15<17:28, 2.57s/it] {'loss': 0.4627, 'grad_norm': 59.0302848815918, 'learning_rate': 3.761097448550755e-07, 'margin_dpo/margin_mean': 30.293479919433594, 'margin_dpo/margin_std': 24.974472045898438, 'logps/chosen': -77.9120864868164, 'logps/rejected': -137.728759765625, 'logps/ref_chosen': -62.97539520263672, 'logps/ref_rejected': -92.49858093261719, 'logits/chosen': -0.5825521945953369, 'logits/rejected': -0.5468716025352478, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 30.29347801208496, 'margin_dpo/beta_margin_mean': 3.0293478965759277, 'margin_dpo/beta_margin_std': 2.5976314544677734, 'margin_dpo/beta_margin_grad_mean': -0.16412891447544098, 'margin_dpo/beta_margin_grad_std': 0.2039998471736908, 'epoch': 0.4} + 40%|███████████████████████████████▎ | 273/681 [18:15<17:28, 2.57s/it] 40%|███████████████████████████████▍ | 274/681 [18:18<17:41, 2.61s/it] {'loss': 0.5193, 'grad_norm': 55.06562423706055, 'learning_rate': 3.75e-07, 'margin_dpo/margin_mean': 26.310407638549805, 'margin_dpo/margin_std': 23.162757873535156, 'logps/chosen': -71.956298828125, 'logps/rejected': -119.93206787109375, 'logps/ref_chosen': -55.66770935058594, 'logps/ref_rejected': -77.33308410644531, 'logits/chosen': -0.6257538199424744, 'logits/rejected': -0.5888440608978271, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 26.310407638549805, 'margin_dpo/beta_margin_mean': 2.6310408115386963, 'margin_dpo/beta_margin_std': 2.3380789756774902, 'margin_dpo/beta_margin_grad_mean': -0.1804315745830536, 'margin_dpo/beta_margin_grad_std': 0.2112412303686142, 'epoch': 0.4} + 40%|███████████████████████████████▍ | 274/681 [18:18<17:41, 2.61s/it] 40%|███████████████████████████████▍ | 275/681 [18:21<18:25, 2.72s/it] {'loss': 0.4719, 'grad_norm': 64.80329895019531, 'learning_rate': 3.738869612786737e-07, 'margin_dpo/margin_mean': 27.70269775390625, 'margin_dpo/margin_std': 24.364110946655273, 'logps/chosen': -60.017059326171875, 'logps/rejected': -132.4287567138672, 'logps/ref_chosen': -48.594703674316406, 'logps/ref_rejected': -93.30369567871094, 'logits/chosen': -0.657637894153595, 'logits/rejected': -0.6397134065628052, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 27.70269775390625, 'margin_dpo/beta_margin_mean': 2.7702696323394775, 'margin_dpo/beta_margin_std': 2.443610906600952, 'margin_dpo/beta_margin_grad_mean': -0.17478503286838531, 'margin_dpo/beta_margin_grad_std': 0.20220105350017548, 'epoch': 0.4} + 40%|███████████████████████████████▍ | 275/681 [18:21<18:25, 2.72s/it] 41%|███████████████████████████████▌ | 276/681 [18:24<18:23, 2.73s/it] {'loss': 0.5956, 'grad_norm': 62.004940032958984, 'learning_rate': 3.7277065802070204e-07, 'margin_dpo/margin_mean': 25.469280242919922, 'margin_dpo/margin_std': 24.609455108642578, 'logps/chosen': -70.30280303955078, 'logps/rejected': -109.56034851074219, 'logps/ref_chosen': -56.57740783691406, 'logps/ref_rejected': -70.36566925048828, 'logits/chosen': -0.6588333249092102, 'logits/rejected': -0.6178128719329834, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 25.469282150268555, 'margin_dpo/beta_margin_mean': 2.5469281673431396, 'margin_dpo/beta_margin_std': 2.4726295471191406, 'margin_dpo/beta_margin_grad_mean': -0.21022818982601166, 'margin_dpo/beta_margin_grad_std': 0.23304350674152374, 'epoch': 0.41} + 41%|███████████████████████████████▌ | 276/681 [18:24<18:23, 2.73s/it] 41%|███████████████████████████████▋ | 277/681 [18:26<17:28, 2.60s/it] {'loss': 0.4262, 'grad_norm': 41.01213836669922, 'learning_rate': 3.71651119641714e-07, 'margin_dpo/margin_mean': 24.34532928466797, 'margin_dpo/margin_std': 18.586042404174805, 'logps/chosen': -68.6185302734375, 'logps/rejected': -129.5735626220703, 'logps/ref_chosen': -56.27156066894531, 'logps/ref_rejected': -92.88127136230469, 'logits/chosen': -0.6487230658531189, 'logits/rejected': -0.6121164560317993, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 24.345327377319336, 'margin_dpo/beta_margin_mean': 2.434532880783081, 'margin_dpo/beta_margin_std': 1.9181517362594604, 'margin_dpo/beta_margin_grad_mean': -0.17126400768756866, 'margin_dpo/beta_margin_grad_std': 0.16744239628314972, 'epoch': 0.41} + 41%|███████████████████████████████▋ | 277/681 [18:26<17:28, 2.60s/it] 41%|███████████████████████████████▊ | 278/681 [18:28<17:22, 2.59s/it] {'loss': 0.482, 'grad_norm': 45.832942962646484, 'learning_rate': 3.705283756425872e-07, 'margin_dpo/margin_mean': 29.894882202148438, 'margin_dpo/margin_std': 26.599227905273438, 'logps/chosen': -64.33135986328125, 'logps/rejected': -132.53787231445312, 'logps/ref_chosen': -52.94194030761719, 'logps/ref_rejected': -91.25357818603516, 'logits/chosen': -0.6533815860748291, 'logits/rejected': -0.6426759958267212, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 29.894880294799805, 'margin_dpo/beta_margin_mean': 2.989488124847412, 'margin_dpo/beta_margin_std': 2.6637423038482666, 'margin_dpo/beta_margin_grad_mean': -0.17872436344623566, 'margin_dpo/beta_margin_grad_std': 0.21186299622058868, 'epoch': 0.41} + 41%|███████████████████████████████▊ | 278/681 [18:28<17:22, 2.59s/it] 41%|███████████████████████████████▉ | 279/681 [18:31<17:16, 2.58s/it] {'loss': 0.488, 'grad_norm': 55.28075408935547, 'learning_rate': 3.6940245560867e-07, 'margin_dpo/margin_mean': 29.428203582763672, 'margin_dpo/margin_std': 24.257051467895508, 'logps/chosen': -60.90464782714844, 'logps/rejected': -129.54296875, 'logps/ref_chosen': -48.641319274902344, 'logps/ref_rejected': -87.8514404296875, 'logits/chosen': -0.6621390581130981, 'logits/rejected': -0.6348008513450623, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 29.428205490112305, 'margin_dpo/beta_margin_mean': 2.9428205490112305, 'margin_dpo/beta_margin_std': 2.446150302886963, 'margin_dpo/beta_margin_grad_mean': -0.17444436252117157, 'margin_dpo/beta_margin_grad_std': 0.2196406126022339, 'epoch': 0.41} + 41%|███████████████████████████████▉ | 279/681 [18:31<17:16, 2.58s/it] 41%|████████████████████████████████ | 280/681 [18:34<17:23, 2.60s/it] {'loss': 0.3483, 'grad_norm': 38.125099182128906, 'learning_rate': 3.6827338920900253e-07, 'margin_dpo/margin_mean': 28.934520721435547, 'margin_dpo/margin_std': 18.611377716064453, 'logps/chosen': -72.28899383544922, 'logps/rejected': -141.04525756835938, 'logps/ref_chosen': -58.797122955322266, 'logps/ref_rejected': -98.61885070800781, 'logits/chosen': -0.6191302537918091, 'logits/rejected': -0.6008737683296204, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 28.934520721435547, 'margin_dpo/beta_margin_mean': 2.8934521675109863, 'margin_dpo/beta_margin_std': 1.8741562366485596, 'margin_dpo/beta_margin_grad_mean': -0.133779838681221, 'margin_dpo/beta_margin_grad_std': 0.18086881935596466, 'epoch': 0.41} + 41%|████████████████████████████████ | 280/681 [18:34<17:23, 2.60s/it] 41%|████████████████████████████████▏ | 281/681 [18:36<17:46, 2.67s/it] {'loss': 0.483, 'grad_norm': 64.53363037109375, 'learning_rate': 3.6714120619553435e-07, 'margin_dpo/margin_mean': 25.293540954589844, 'margin_dpo/margin_std': 20.025854110717773, 'logps/chosen': -67.85418701171875, 'logps/rejected': -118.54179382324219, 'logps/ref_chosen': -55.488521575927734, 'logps/ref_rejected': -80.88258361816406, 'logits/chosen': -0.665095329284668, 'logits/rejected': -0.62502121925354, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 25.293540954589844, 'margin_dpo/beta_margin_mean': 2.5293540954589844, 'margin_dpo/beta_margin_std': 2.0211899280548096, 'margin_dpo/beta_margin_grad_mean': -0.15846484899520874, 'margin_dpo/beta_margin_grad_std': 0.1876082420349121, 'epoch': 0.41} + 41%|████████████████████████████████▏ | 281/681 [18:36<17:46, 2.67s/it] 41%|████████████████████████████████▎ | 282/681 [18:39<17:26, 2.62s/it] {'loss': 0.475, 'grad_norm': 50.1074333190918, 'learning_rate': 3.660059364023408e-07, 'margin_dpo/margin_mean': 23.41071128845215, 'margin_dpo/margin_std': 20.858131408691406, 'logps/chosen': -85.81141662597656, 'logps/rejected': -131.50296020507812, 'logps/ref_chosen': -73.07014465332031, 'logps/ref_rejected': -95.35098266601562, 'logits/chosen': -0.6407305002212524, 'logits/rejected': -0.593590497970581, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 23.41071128845215, 'margin_dpo/beta_margin_mean': 2.341071128845215, 'margin_dpo/beta_margin_std': 2.0938100814819336, 'margin_dpo/beta_margin_grad_mean': -0.18391168117523193, 'margin_dpo/beta_margin_grad_std': 0.18578048050403595, 'epoch': 0.41} + 41%|████████████████████████████████▎ | 282/681 [18:39<17:26, 2.62s/it] 42%|████████████████████████████████▍ | 283/681 [18:42<17:21, 2.62s/it] {'loss': 0.4753, 'grad_norm': 48.29468536376953, 'learning_rate': 3.6486760974483685e-07, 'margin_dpo/margin_mean': 28.113353729248047, 'margin_dpo/margin_std': 23.463539123535156, 'logps/chosen': -74.30840301513672, 'logps/rejected': -137.50985717773438, 'logps/ref_chosen': -61.89844512939453, 'logps/ref_rejected': -96.98655700683594, 'logits/chosen': -0.6420848369598389, 'logits/rejected': -0.6138025522232056, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 28.113353729248047, 'margin_dpo/beta_margin_mean': 2.811335563659668, 'margin_dpo/beta_margin_std': 2.354823589324951, 'margin_dpo/beta_margin_grad_mean': -0.1640281230211258, 'margin_dpo/beta_margin_grad_std': 0.21006377041339874, 'epoch': 0.42} + 42%|████████████████████████████████▍ | 283/681 [18:42<17:21, 2.62s/it] 42%|████████████████████████████████▌ | 284/681 [18:44<17:39, 2.67s/it] {'loss': 0.4108, 'grad_norm': 43.3529167175293, 'learning_rate': 3.6372625621898863e-07, 'margin_dpo/margin_mean': 29.832664489746094, 'margin_dpo/margin_std': 25.724153518676758, 'logps/chosen': -72.13871765136719, 'logps/rejected': -137.00511169433594, 'logps/ref_chosen': -58.4355354309082, 'logps/ref_rejected': -93.46926879882812, 'logits/chosen': -0.6275640726089478, 'logits/rejected': -0.6143908500671387, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 29.832664489746094, 'margin_dpo/beta_margin_mean': 2.983266592025757, 'margin_dpo/beta_margin_std': 2.578672170639038, 'margin_dpo/beta_margin_grad_mean': -0.15540792047977448, 'margin_dpo/beta_margin_grad_std': 0.1845344454050064, 'epoch': 0.42} + 42%|████████████████████████████████▌ | 284/681 [18:44<17:39, 2.67s/it] 42%|████████████████████████████████▋ | 285/681 [18:47<17:23, 2.64s/it] {'loss': 0.4257, 'grad_norm': 57.101165771484375, 'learning_rate': 3.625819059005228e-07, 'margin_dpo/margin_mean': 26.457977294921875, 'margin_dpo/margin_std': 20.855016708374023, 'logps/chosen': -81.82306671142578, 'logps/rejected': -141.17568969726562, 'logps/ref_chosen': -66.2322006225586, 'logps/ref_rejected': -99.1268310546875, 'logits/chosen': -0.6859316825866699, 'logits/rejected': -0.6596359014511108, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 26.457977294921875, 'margin_dpo/beta_margin_mean': 2.6457977294921875, 'margin_dpo/beta_margin_std': 2.1147918701171875, 'margin_dpo/beta_margin_grad_mean': -0.16400812566280365, 'margin_dpo/beta_margin_grad_std': 0.18523728847503662, 'epoch': 0.42} + 42%|████████████████████████████████▋ | 285/681 [18:47<17:23, 2.64s/it] 42%|████████████████████████████████▊ | 286/681 [18:50<17:24, 2.65s/it] {'loss': 0.5505, 'grad_norm': 58.981807708740234, 'learning_rate': 3.614345889441346e-07, 'margin_dpo/margin_mean': 27.725364685058594, 'margin_dpo/margin_std': 25.239097595214844, 'logps/chosen': -86.8876724243164, 'logps/rejected': -130.25048828125, 'logps/ref_chosen': -72.95100402832031, 'logps/ref_rejected': -88.58845520019531, 'logits/chosen': -0.6508222222328186, 'logits/rejected': -0.6174975633621216, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 27.725364685058594, 'margin_dpo/beta_margin_mean': 2.772536516189575, 'margin_dpo/beta_margin_std': 2.563308000564575, 'margin_dpo/beta_margin_grad_mean': -0.18501420319080353, 'margin_dpo/beta_margin_grad_std': 0.22696195542812347, 'epoch': 0.42} + 42%|████████████████████████████████▊ | 286/681 [18:50<17:24, 2.65s/it] 42%|████████████████████████████████▊ | 287/681 [18:52<16:38, 2.53s/it] {'loss': 0.534, 'grad_norm': 52.582481384277344, 'learning_rate': 3.6028433558269275e-07, 'margin_dpo/margin_mean': 26.86972427368164, 'margin_dpo/margin_std': 25.95490264892578, 'logps/chosen': -75.86917114257812, 'logps/rejected': -118.89381408691406, 'logps/ref_chosen': -61.54115295410156, 'logps/ref_rejected': -77.6960678100586, 'logits/chosen': -0.658734142780304, 'logits/rejected': -0.6133627891540527, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 26.869726181030273, 'margin_dpo/beta_margin_mean': 2.6869726181030273, 'margin_dpo/beta_margin_std': 2.597897529602051, 'margin_dpo/beta_margin_grad_mean': -0.1924961507320404, 'margin_dpo/beta_margin_grad_std': 0.21066516637802124, 'epoch': 0.42} + 42%|████████████████████████████████▊ | 287/681 [18:52<16:38, 2.53s/it] 42%|████████████████████████████████▉ | 288/681 [18:55<17:33, 2.68s/it] {'loss': 0.4303, 'grad_norm': 57.62872314453125, 'learning_rate': 3.5913117612644327e-07, 'margin_dpo/margin_mean': 27.80404281616211, 'margin_dpo/margin_std': 21.125944137573242, 'logps/chosen': -72.6466293334961, 'logps/rejected': -131.12515258789062, 'logps/ref_chosen': -56.661224365234375, 'logps/ref_rejected': -87.335693359375, 'logits/chosen': -0.634566605091095, 'logits/rejected': -0.6029102206230164, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 27.80404281616211, 'margin_dpo/beta_margin_mean': 2.7804043292999268, 'margin_dpo/beta_margin_std': 2.1588449478149414, 'margin_dpo/beta_margin_grad_mean': -0.16040383279323578, 'margin_dpo/beta_margin_grad_std': 0.19727593660354614, 'epoch': 0.42} + 42%|████████████████████████████████▉ | 288/681 [18:55<17:33, 2.68s/it] 42%|█████████████████████████████████ | 289/681 [18:57<17:09, 2.63s/it] {'loss': 0.5004, 'grad_norm': 50.83492660522461, 'learning_rate': 3.5797514096221024e-07, 'margin_dpo/margin_mean': 30.28182601928711, 'margin_dpo/margin_std': 29.03339958190918, 'logps/chosen': -61.59012985229492, 'logps/rejected': -134.28424072265625, 'logps/ref_chosen': -45.23039245605469, 'logps/ref_rejected': -87.64266967773438, 'logits/chosen': -0.6417437791824341, 'logits/rejected': -0.6304539442062378, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 30.28182601928711, 'margin_dpo/beta_margin_mean': 3.0281827449798584, 'margin_dpo/beta_margin_std': 2.936239004135132, 'margin_dpo/beta_margin_grad_mean': -0.18503758311271667, 'margin_dpo/beta_margin_grad_std': 0.2104889303445816, 'epoch': 0.42} + 42%|█████████████████████████████████ | 289/681 [18:57<17:09, 2.63s/it] 43%|█████████████████████████████████▏ | 290/681 [19:00<17:08, 2.63s/it] {'loss': 0.5027, 'grad_norm': 63.28836441040039, 'learning_rate': 3.568162605525952e-07, 'margin_dpo/margin_mean': 31.53160858154297, 'margin_dpo/margin_std': 29.308597564697266, 'logps/chosen': -72.06575775146484, 'logps/rejected': -164.83444213867188, 'logps/ref_chosen': -55.47149658203125, 'logps/ref_rejected': -116.70857238769531, 'logits/chosen': -0.5944575071334839, 'logits/rejected': -0.5908774137496948, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 31.53160858154297, 'margin_dpo/beta_margin_mean': 3.15316104888916, 'margin_dpo/beta_margin_std': 2.957723617553711, 'margin_dpo/beta_margin_grad_mean': -0.17223544418811798, 'margin_dpo/beta_margin_grad_std': 0.22271078824996948, 'epoch': 0.43} + 43%|█████████████████████████████████▏ | 290/681 [19:00<17:08, 2.63s/it] 43%|█████████████████████████████████▎ | 291/681 [19:03<17:01, 2.62s/it] {'loss': 0.4813, 'grad_norm': 56.67517852783203, 'learning_rate': 3.5565456543517485e-07, 'margin_dpo/margin_mean': 27.70128059387207, 'margin_dpo/margin_std': 22.738750457763672, 'logps/chosen': -76.0269775390625, 'logps/rejected': -129.76498413085938, 'logps/ref_chosen': -63.26036834716797, 'logps/ref_rejected': -89.29708862304688, 'logits/chosen': -0.6302033066749573, 'logits/rejected': -0.595551609992981, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 27.701282501220703, 'margin_dpo/beta_margin_mean': 2.7701282501220703, 'margin_dpo/beta_margin_std': 2.2966012954711914, 'margin_dpo/beta_margin_grad_mean': -0.16477952897548676, 'margin_dpo/beta_margin_grad_std': 0.20427057147026062, 'epoch': 0.43} + 43%|█████████████████████████████████▎ | 291/681 [19:03<17:01, 2.62s/it] 43%|█████████████████████████████████▍ | 292/681 [19:05<16:32, 2.55s/it] {'loss': 0.3934, 'grad_norm': 54.23537063598633, 'learning_rate': 3.5449008622169583e-07, 'margin_dpo/margin_mean': 29.846210479736328, 'margin_dpo/margin_std': 24.429065704345703, 'logps/chosen': -70.70861053466797, 'logps/rejected': -136.59767150878906, 'logps/ref_chosen': -53.91852951049805, 'logps/ref_rejected': -89.96138000488281, 'logits/chosen': -0.6187624931335449, 'logits/rejected': -0.5753225684165955, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 29.846208572387695, 'margin_dpo/beta_margin_mean': 2.9846208095550537, 'margin_dpo/beta_margin_std': 2.461369752883911, 'margin_dpo/beta_margin_grad_mean': -0.15516723692417145, 'margin_dpo/beta_margin_grad_std': 0.1758795827627182, 'epoch': 0.43} + 43%|█████████████████████████████████▍ | 292/681 [19:05<16:32, 2.55s/it] 43%|█████████████████████████████████▌ | 293/681 [19:08<16:44, 2.59s/it] {'loss': 0.5966, 'grad_norm': 52.83697509765625, 'learning_rate': 3.5332285359726846e-07, 'margin_dpo/margin_mean': 24.17245864868164, 'margin_dpo/margin_std': 24.42681121826172, 'logps/chosen': -76.67402648925781, 'logps/rejected': -118.3228988647461, 'logps/ref_chosen': -60.376033782958984, 'logps/ref_rejected': -77.8524398803711, 'logits/chosen': -0.6384230852127075, 'logits/rejected': -0.6081752777099609, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 24.17245864868164, 'margin_dpo/beta_margin_mean': 2.417245864868164, 'margin_dpo/beta_margin_std': 2.448225259780884, 'margin_dpo/beta_margin_grad_mean': -0.2109437733888626, 'margin_dpo/beta_margin_grad_std': 0.217354878783226, 'epoch': 0.43} + 43%|█████████████████████████████████▌ | 293/681 [19:08<16:44, 2.59s/it] 43%|█████████████████████████████████▋ | 294/681 [19:10<17:02, 2.64s/it] {'loss': 0.5153, 'grad_norm': 42.41814041137695, 'learning_rate': 3.5215289831955786e-07, 'margin_dpo/margin_mean': 27.206314086914062, 'margin_dpo/margin_std': 25.83649444580078, 'logps/chosen': -62.738616943359375, 'logps/rejected': -123.75438690185547, 'logps/ref_chosen': -48.0875358581543, 'logps/ref_rejected': -81.89698791503906, 'logits/chosen': -0.6331781148910522, 'logits/rejected': -0.6203632354736328, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 27.206314086914062, 'margin_dpo/beta_margin_mean': 2.7206313610076904, 'margin_dpo/beta_margin_std': 2.606935977935791, 'margin_dpo/beta_margin_grad_mean': -0.1880524605512619, 'margin_dpo/beta_margin_grad_std': 0.2148909568786621, 'epoch': 0.43} + 43%|█████████████████████████████████▋ | 294/681 [19:10<17:02, 2.64s/it] 43%|█████████████████████████████████▊ | 295/681 [19:13<16:34, 2.58s/it] {'loss': 0.5905, 'grad_norm': 63.754703521728516, 'learning_rate': 3.509802512179737e-07, 'margin_dpo/margin_mean': 27.146446228027344, 'margin_dpo/margin_std': 24.949363708496094, 'logps/chosen': -68.84889221191406, 'logps/rejected': -133.5269775390625, 'logps/ref_chosen': -49.92467498779297, 'logps/ref_rejected': -87.45632934570312, 'logits/chosen': -0.6102343797683716, 'logits/rejected': -0.6015244722366333, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 27.14644432067871, 'margin_dpo/beta_margin_mean': 2.714644432067871, 'margin_dpo/beta_margin_std': 2.515841245651245, 'margin_dpo/beta_margin_grad_mean': -0.18437956273555756, 'margin_dpo/beta_margin_grad_std': 0.22511690855026245, 'epoch': 0.43} + 43%|█████████████████████████████████▊ | 295/681 [19:13<16:34, 2.58s/it] 43%|█████████████████████████████████▉ | 296/681 [19:15<16:16, 2.54s/it] {'loss': 0.7362, 'grad_norm': 79.74415588378906, 'learning_rate': 3.498049431928577e-07, 'margin_dpo/margin_mean': 23.47943878173828, 'margin_dpo/margin_std': 26.526391983032227, 'logps/chosen': -84.0577392578125, 'logps/rejected': -135.13502502441406, 'logps/ref_chosen': -65.49124145507812, 'logps/ref_rejected': -93.08908081054688, 'logits/chosen': -0.6979824304580688, 'logits/rejected': -0.6591476202011108, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 23.47943878173828, 'margin_dpo/beta_margin_mean': 2.3479440212249756, 'margin_dpo/beta_margin_std': 2.6585099697113037, 'margin_dpo/beta_margin_grad_mean': -0.23561769723892212, 'margin_dpo/beta_margin_grad_std': 0.25472357869148254, 'epoch': 0.43} + 43%|█████████████████████████████████▉ | 296/681 [19:15<16:16, 2.54s/it] 44%|██████████████████████████████████ | 297/681 [19:18<16:32, 2.58s/it] {'loss': 0.426, 'grad_norm': 44.74517059326172, 'learning_rate': 3.486270052146694e-07, 'margin_dpo/margin_mean': 28.571863174438477, 'margin_dpo/margin_std': 23.766578674316406, 'logps/chosen': -74.85836029052734, 'logps/rejected': -142.09182739257812, 'logps/ref_chosen': -56.47694778442383, 'logps/ref_rejected': -95.1385498046875, 'logits/chosen': -0.5774829387664795, 'logits/rejected': -0.5429031848907471, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 28.571861267089844, 'margin_dpo/beta_margin_mean': 2.8571863174438477, 'margin_dpo/beta_margin_std': 2.378805160522461, 'margin_dpo/beta_margin_grad_mean': -0.16123466193675995, 'margin_dpo/beta_margin_grad_std': 0.1913672685623169, 'epoch': 0.44} + 44%|██████████████████████████████████ | 297/681 [19:18<16:32, 2.58s/it] 44%|██████████████████████████████████▏ | 298/681 [19:21<17:10, 2.69s/it] {'loss': 0.4135, 'grad_norm': 44.202003479003906, 'learning_rate': 3.474464683231698e-07, 'margin_dpo/margin_mean': 29.80324935913086, 'margin_dpo/margin_std': 26.537506103515625, 'logps/chosen': -83.96099090576172, 'logps/rejected': -163.1012420654297, 'logps/ref_chosen': -67.32516479492188, 'logps/ref_rejected': -116.66217041015625, 'logits/chosen': -0.6306143999099731, 'logits/rejected': -0.6259936690330505, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 29.803251266479492, 'margin_dpo/beta_margin_mean': 2.9803249835968018, 'margin_dpo/beta_margin_std': 2.6625173091888428, 'margin_dpo/beta_margin_grad_mean': -0.1614616960287094, 'margin_dpo/beta_margin_grad_std': 0.180389866232872, 'epoch': 0.44} + 44%|██████████████████████████████████▏ | 298/681 [19:21<17:10, 2.69s/it] 44%|██████████████████████████████████▏ | 299/681 [19:24<16:56, 2.66s/it] {'loss': 0.5069, 'grad_norm': 59.32780075073242, 'learning_rate': 3.462633636266041e-07, 'margin_dpo/margin_mean': 31.181495666503906, 'margin_dpo/margin_std': 27.928592681884766, 'logps/chosen': -64.30989837646484, 'logps/rejected': -130.85752868652344, 'logps/ref_chosen': -48.96209716796875, 'logps/ref_rejected': -84.32823944091797, 'logits/chosen': -0.5633834600448608, 'logits/rejected': -0.5420501232147217, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 31.181493759155273, 'margin_dpo/beta_margin_mean': 3.118149518966675, 'margin_dpo/beta_margin_std': 2.880525588989258, 'margin_dpo/beta_margin_grad_mean': -0.17790299654006958, 'margin_dpo/beta_margin_grad_std': 0.22678081691265106, 'epoch': 0.44} + 44%|██████████████████████████████████▏ | 299/681 [19:24<16:56, 2.66s/it] 44%|██████████████████████████████████▎ | 300/681 [19:27<17:34, 2.77s/it] {'loss': 0.7096, 'grad_norm': 81.78619384765625, 'learning_rate': 3.4507772230088147e-07, 'margin_dpo/margin_mean': 29.621217727661133, 'margin_dpo/margin_std': 29.81679344177246, 'logps/chosen': -80.5472183227539, 'logps/rejected': -147.06117248535156, 'logps/ref_chosen': -59.073707580566406, 'logps/ref_rejected': -95.9664535522461, 'logits/chosen': -0.6105576157569885, 'logits/rejected': -0.591549277305603, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 29.6212158203125, 'margin_dpo/beta_margin_mean': 2.9621217250823975, 'margin_dpo/beta_margin_std': 2.987994909286499, 'margin_dpo/beta_margin_grad_mean': -0.20562496781349182, 'margin_dpo/beta_margin_grad_std': 0.27093952894210815, 'epoch': 0.44} + 44%|██████████████████████████████████▎ | 300/681 [19:27<17:34, 2.77s/it][INFO|trainer.py:4307] 2026-04-17 21:45:57,467 >> +***** Running Evaluation ***** +[INFO|trainer.py:4309] 2026-04-17 21:45:57,467 >> Num examples = 2339 +[INFO|trainer.py:4312] 2026-04-17 21:45:57,467 >> Batch size = 8 + + 0%| | 0/73 [00:00> +***** Running Evaluation ***** +[INFO|trainer.py:4309] 2026-04-17 21:51:01,212 >> Num examples = 2339 +[INFO|trainer.py:4312] 2026-04-17 21:51:01,212 >> Batch size = 8 + + 0%| | 0/73 [00:00> Saving model checkpoint to /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/checkpoint-400 +[INFO|configuration_utils.py:419] 2026-04-17 21:51:57,594 >> Configuration saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/checkpoint-400/config.json +[INFO|configuration_utils.py:911] 2026-04-17 21:51:57,601 >> Configuration saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/checkpoint-400/generation_config.json +[INFO|modeling_utils.py:3580] 2026-04-17 21:52:50,158 >> The model is bigger than the maximum size per checkpoint (5GB) and is going to be split in 6 checkpoint shards. You can find where each parameters has been saved in the index located at /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/checkpoint-400/model.safetensors.index.json. +[INFO|tokenization_utils_base.py:2510] 2026-04-17 21:52:50,168 >> tokenizer config file saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/checkpoint-400/tokenizer_config.json +[INFO|tokenization_utils_base.py:2519] 2026-04-17 21:52:50,173 >> Special tokens file saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/checkpoint-400/special_tokens_map.json + 59%|████████████████████████████████████████████▏ | 401/681 [30:09<8:02:03, 103.30s/it] {'loss': 0.659, 'grad_norm': 61.670928955078125, 'learning_rate': 2.1800473436235136e-07, 'margin_dpo/margin_mean': 29.519912719726562, 'margin_dpo/margin_std': 31.590171813964844, 'logps/chosen': -76.15703582763672, 'logps/rejected': -132.30641174316406, 'logps/ref_chosen': -57.16303253173828, 'logps/ref_rejected': -83.79249572753906, 'logits/chosen': -0.5769657492637634, 'logits/rejected': -0.5573090314865112, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 29.519914627075195, 'margin_dpo/beta_margin_mean': 2.951991558074951, 'margin_dpo/beta_margin_std': 3.1654133796691895, 'margin_dpo/beta_margin_grad_mean': -0.2077464908361435, 'margin_dpo/beta_margin_grad_std': 0.25239863991737366, 'epoch': 0.59} + 59%|████████████████████████████████████████████▏ | 401/681 [30:09<8:02:03, 103.30s/it] 59%|████████████████████████████████████████████▊ | 402/681 [30:11<5:39:28, 73.00s/it] {'loss': 0.2132, 'grad_norm': 26.211894989013672, 'learning_rate': 2.1673238449588665e-07, 'margin_dpo/margin_mean': 38.97754669189453, 'margin_dpo/margin_std': 23.83334732055664, 'logps/chosen': -62.62638854980469, 'logps/rejected': -131.90960693359375, 'logps/ref_chosen': -50.74037170410156, 'logps/ref_rejected': -81.0460433959961, 'logits/chosen': -0.6328971982002258, 'logits/rejected': -0.584295392036438, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 38.97754669189453, 'margin_dpo/beta_margin_mean': 3.8977549076080322, 'margin_dpo/beta_margin_std': 2.383517265319824, 'margin_dpo/beta_margin_grad_mean': -0.08531676232814789, 'margin_dpo/beta_margin_grad_std': 0.13695916533470154, 'epoch': 0.59} + 59%|████████████████████████████████████████████▊ | 402/681 [30:11<5:39:28, 73.00s/it] 59%|████████████████████████████████████████████▉ | 403/681 [30:14<4:00:10, 51.84s/it] {'loss': 0.5741, 'grad_norm': 63.287567138671875, 'learning_rate': 2.154609112620295e-07, 'margin_dpo/margin_mean': 30.17224884033203, 'margin_dpo/margin_std': 28.130752563476562, 'logps/chosen': -62.53410339355469, 'logps/rejected': -122.82563781738281, 'logps/ref_chosen': -47.14731216430664, 'logps/ref_rejected': -77.2666015625, 'logits/chosen': -0.6422700881958008, 'logits/rejected': -0.6241501569747925, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 30.1722469329834, 'margin_dpo/beta_margin_mean': 3.0172247886657715, 'margin_dpo/beta_margin_std': 2.846843957901001, 'margin_dpo/beta_margin_grad_mean': -0.1763564795255661, 'margin_dpo/beta_margin_grad_std': 0.23426702618598938, 'epoch': 0.59} + 59%|████████████████████████████████████████████▉ | 403/681 [30:14<4:00:10, 51.84s/it] 59%|█████████████████████████████████████████████ | 404/681 [30:16<2:50:56, 37.03s/it] {'loss': 0.5739, 'grad_norm': 54.917449951171875, 'learning_rate': 2.1419034816528218e-07, 'margin_dpo/margin_mean': 30.53654670715332, 'margin_dpo/margin_std': 28.8435115814209, 'logps/chosen': -63.40578079223633, 'logps/rejected': -123.22205352783203, 'logps/ref_chosen': -47.875274658203125, 'logps/ref_rejected': -77.15499877929688, 'logits/chosen': -0.6123020648956299, 'logits/rejected': -0.578801155090332, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 30.53654670715332, 'margin_dpo/beta_margin_mean': 3.053654670715332, 'margin_dpo/beta_margin_std': 2.8906707763671875, 'margin_dpo/beta_margin_grad_mean': -0.18759508430957794, 'margin_dpo/beta_margin_grad_std': 0.23627623915672302, 'epoch': 0.59} + 59%|█████████████████████████████████████████████ | 404/681 [30:16<2:50:56, 37.03s/it] 59%|█████████████████████████████████████████████▏ | 405/681 [30:18<2:02:30, 26.63s/it] {'loss': 0.5427, 'grad_norm': 64.9948501586914, 'learning_rate': 2.129207286861638e-07, 'margin_dpo/margin_mean': 30.217140197753906, 'margin_dpo/margin_std': 27.509521484375, 'logps/chosen': -84.49642944335938, 'logps/rejected': -136.73745727539062, 'logps/ref_chosen': -65.16290283203125, 'logps/ref_rejected': -87.18678283691406, 'logits/chosen': -0.5796902179718018, 'logits/rejected': -0.549854040145874, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 30.217140197753906, 'margin_dpo/beta_margin_mean': 3.021714210510254, 'margin_dpo/beta_margin_std': 2.9265987873077393, 'margin_dpo/beta_margin_grad_mean': -0.18682169914245605, 'margin_dpo/beta_margin_grad_std': 0.22749853134155273, 'epoch': 0.59} + 59%|█████████████████████████████████████████████▏ | 405/681 [30:18<2:02:30, 26.63s/it] 60%|█████████████████████████████████████████████▎ | 406/681 [30:21<1:29:10, 19.46s/it] {'loss': 0.5435, 'grad_norm': 61.627079010009766, 'learning_rate': 2.1165208628032861e-07, 'margin_dpo/margin_mean': 31.772830963134766, 'margin_dpo/margin_std': 27.909154891967773, 'logps/chosen': -66.44183349609375, 'logps/rejected': -140.552490234375, 'logps/ref_chosen': -49.740814208984375, 'logps/ref_rejected': -92.07862854003906, 'logits/chosen': -0.6366710662841797, 'logits/rejected': -0.6224513649940491, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 31.772830963134766, 'margin_dpo/beta_margin_mean': 3.1772830486297607, 'margin_dpo/beta_margin_std': 2.8382697105407715, 'margin_dpo/beta_margin_grad_mean': -0.1675260215997696, 'margin_dpo/beta_margin_grad_std': 0.22542835772037506, 'epoch': 0.6} + 60%|█████████████████████████████████████████████▎ | 406/681 [30:21<1:29:10, 19.46s/it] 60%|█████████████████████████████████████████████▍ | 407/681 [30:24<1:05:55, 14.44s/it] {'loss': 0.6107, 'grad_norm': 68.63170623779297, 'learning_rate': 2.1038445437768375e-07, 'margin_dpo/margin_mean': 32.281612396240234, 'margin_dpo/margin_std': 29.094558715820312, 'logps/chosen': -72.40534973144531, 'logps/rejected': -125.86834716796875, 'logps/ref_chosen': -56.33069610595703, 'logps/ref_rejected': -77.5120849609375, 'logits/chosen': -0.6445499062538147, 'logits/rejected': -0.599348783493042, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 32.281612396240234, 'margin_dpo/beta_margin_mean': 3.228161334991455, 'margin_dpo/beta_margin_std': 2.978395462036133, 'margin_dpo/beta_margin_grad_mean': -0.18231885135173798, 'margin_dpo/beta_margin_grad_std': 0.25164178013801575, 'epoch': 0.6} + 60%|█████████████████████████████████████████████▍ | 407/681 [30:24<1:05:55, 14.44s/it] 60%|██████████████████████████████████████████████▋ | 408/681 [30:27<49:55, 10.97s/it] {'loss': 0.6172, 'grad_norm': 81.44627380371094, 'learning_rate': 2.0911786638150872e-07, 'margin_dpo/margin_mean': 27.853038787841797, 'margin_dpo/margin_std': 27.155353546142578, 'logps/chosen': -85.27023315429688, 'logps/rejected': -133.43089294433594, 'logps/ref_chosen': -69.789306640625, 'logps/ref_rejected': -90.09693908691406, 'logits/chosen': -0.6902725696563721, 'logits/rejected': -0.6373718976974487, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 27.853038787841797, 'margin_dpo/beta_margin_mean': 2.785304069519043, 'margin_dpo/beta_margin_std': 2.7439894676208496, 'margin_dpo/beta_margin_grad_mean': -0.20204903185367584, 'margin_dpo/beta_margin_grad_std': 0.25068265199661255, 'epoch': 0.6} + 60%|██████████████████████████████████████████████▋ | 408/681 [30:27<49:55, 10.97s/it] 60%|██████████████████████████████████████████████▊ | 409/681 [30:29<38:30, 8.49s/it] {'loss': 0.4121, 'grad_norm': 49.702667236328125, 'learning_rate': 2.0785235566757517e-07, 'margin_dpo/margin_mean': 30.92287254333496, 'margin_dpo/margin_std': 25.64594078063965, 'logps/chosen': -84.24601745605469, 'logps/rejected': -132.7557373046875, 'logps/ref_chosen': -67.31744384765625, 'logps/ref_rejected': -84.904296875, 'logits/chosen': -0.6016473770141602, 'logits/rejected': -0.5694031119346619, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 30.92287254333496, 'margin_dpo/beta_margin_mean': 3.092287302017212, 'margin_dpo/beta_margin_std': 2.565514326095581, 'margin_dpo/beta_margin_grad_mean': -0.1540304273366928, 'margin_dpo/beta_margin_grad_std': 0.19426687061786652, 'epoch': 0.6} + 60%|██████████████████████████████████████████████▊ | 409/681 [30:30<38:30, 8.49s/it] 60%|██████████████████████████████████████████████▉ | 410/681 [30:32<30:19, 6.71s/it] {'loss': 0.5957, 'grad_norm': 67.67236328125, 'learning_rate': 2.065879555832674e-07, 'margin_dpo/margin_mean': 27.859020233154297, 'margin_dpo/margin_std': 26.202781677246094, 'logps/chosen': -70.31283569335938, 'logps/rejected': -129.9054718017578, 'logps/ref_chosen': -51.465354919433594, 'logps/ref_rejected': -83.198974609375, 'logits/chosen': -0.6346931457519531, 'logits/rejected': -0.6326348781585693, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 27.859020233154297, 'margin_dpo/beta_margin_mean': 2.7859020233154297, 'margin_dpo/beta_margin_std': 2.646648406982422, 'margin_dpo/beta_margin_grad_mean': -0.20088441669940948, 'margin_dpo/beta_margin_grad_std': 0.24235385656356812, 'epoch': 0.6} + 60%|██████████████████████████████████████████████▉ | 410/681 [30:32<30:19, 6.71s/it] 60%|███████████████████████████████████████████████ | 411/681 [30:34<24:09, 5.37s/it] {'loss': 0.5393, 'grad_norm': 57.020423889160156, 'learning_rate': 2.0532469944670343e-07, 'margin_dpo/margin_mean': 29.69510841369629, 'margin_dpo/margin_std': 27.609901428222656, 'logps/chosen': -71.45536041259766, 'logps/rejected': -129.53814697265625, 'logps/ref_chosen': -52.30727005004883, 'logps/ref_rejected': -80.69495391845703, 'logits/chosen': -0.6736893653869629, 'logits/rejected': -0.640461802482605, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 29.695110321044922, 'margin_dpo/beta_margin_mean': 2.969511032104492, 'margin_dpo/beta_margin_std': 2.880366563796997, 'margin_dpo/beta_margin_grad_mean': -0.1864738166332245, 'margin_dpo/beta_margin_grad_std': 0.2311916947364807, 'epoch': 0.6} + 60%|███████████████████████████████████████████████ | 411/681 [30:34<24:09, 5.37s/it] 60%|███████████████████████████████████████████████▏ | 412/681 [30:37<19:59, 4.46s/it] {'loss': 0.501, 'grad_norm': 41.312705993652344, 'learning_rate': 2.0406262054585738e-07, 'margin_dpo/margin_mean': 29.459096908569336, 'margin_dpo/margin_std': 27.16181182861328, 'logps/chosen': -68.71327209472656, 'logps/rejected': -145.08905029296875, 'logps/ref_chosen': -53.144126892089844, 'logps/ref_rejected': -100.06080627441406, 'logits/chosen': -0.702052652835846, 'logits/rejected': -0.6910427808761597, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 29.459095001220703, 'margin_dpo/beta_margin_mean': 2.9459095001220703, 'margin_dpo/beta_margin_std': 2.7281651496887207, 'margin_dpo/beta_margin_grad_mean': -0.18716482818126678, 'margin_dpo/beta_margin_grad_std': 0.21060419082641602, 'epoch': 0.6} + 60%|███████████████████████████████████████████████▏ | 412/681 [30:37<19:59, 4.46s/it] 61%|███████████████████████████████████████████████▎ | 413/681 [30:39<17:33, 3.93s/it] {'loss': 0.4911, 'grad_norm': 59.28904724121094, 'learning_rate': 2.0280175213768205e-07, 'margin_dpo/margin_mean': 29.902387619018555, 'margin_dpo/margin_std': 25.28069496154785, 'logps/chosen': -80.49532318115234, 'logps/rejected': -148.28915405273438, 'logps/ref_chosen': -61.58196258544922, 'logps/ref_rejected': -99.47340393066406, 'logits/chosen': -0.5773541927337646, 'logits/rejected': -0.5431898832321167, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 29.902387619018555, 'margin_dpo/beta_margin_mean': 2.990238666534424, 'margin_dpo/beta_margin_std': 2.5804879665374756, 'margin_dpo/beta_margin_grad_mean': -0.15901578962802887, 'margin_dpo/beta_margin_grad_std': 0.21321162581443787, 'epoch': 0.61} + 61%|███████████████████████████████████████████████▎ | 413/681 [30:39<17:33, 3.93s/it] 61%|███████████████████████████████████████████████▍ | 414/681 [30:42<15:39, 3.52s/it] {'loss': 0.3637, 'grad_norm': 55.32724380493164, 'learning_rate': 2.0154212744723247e-07, 'margin_dpo/margin_mean': 35.684043884277344, 'margin_dpo/margin_std': 25.48971176147461, 'logps/chosen': -62.55944061279297, 'logps/rejected': -139.25851440429688, 'logps/ref_chosen': -46.63148880004883, 'logps/ref_rejected': -87.64652252197266, 'logits/chosen': -0.6178678274154663, 'logits/rejected': -0.580098032951355, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 35.684043884277344, 'margin_dpo/beta_margin_mean': 3.56840443611145, 'margin_dpo/beta_margin_std': 2.785224437713623, 'margin_dpo/beta_margin_grad_mean': -0.13259248435497284, 'margin_dpo/beta_margin_grad_std': 0.19208675622940063, 'epoch': 0.61} + 61%|███████████████████████████████████████████████▍ | 414/681 [30:42<15:39, 3.52s/it] 61%|███████████████████████████████████████████████▌ | 415/681 [30:45<14:36, 3.29s/it] {'loss': 0.3982, 'grad_norm': 44.93287658691406, 'learning_rate': 2.002837796667909e-07, 'margin_dpo/margin_mean': 29.6812686920166, 'margin_dpo/margin_std': 24.717784881591797, 'logps/chosen': -95.38108825683594, 'logps/rejected': -146.9215850830078, 'logps/ref_chosen': -78.6182861328125, 'logps/ref_rejected': -100.47752380371094, 'logits/chosen': -0.5938626527786255, 'logits/rejected': -0.5675798654556274, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 29.68126678466797, 'margin_dpo/beta_margin_mean': 2.9681267738342285, 'margin_dpo/beta_margin_std': 2.4819741249084473, 'margin_dpo/beta_margin_grad_mean': -0.1585043966770172, 'margin_dpo/beta_margin_grad_std': 0.17520886659622192, 'epoch': 0.61} + 61%|███████████████████████████████████████████████▌ | 415/681 [30:45<14:36, 3.29s/it] 61%|███████████████████████████████████████████████▋ | 416/681 [30:47<13:33, 3.07s/it] {'loss': 0.3876, 'grad_norm': 49.30588150024414, 'learning_rate': 1.990267419549914e-07, 'margin_dpo/margin_mean': 36.51543426513672, 'margin_dpo/margin_std': 27.713136672973633, 'logps/chosen': -75.66851806640625, 'logps/rejected': -144.47354125976562, 'logps/ref_chosen': -58.27912521362305, 'logps/ref_rejected': -90.56871795654297, 'logits/chosen': -0.6397312879562378, 'logits/rejected': -0.6059544086456299, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 36.51543426513672, 'margin_dpo/beta_margin_mean': 3.651543617248535, 'margin_dpo/beta_margin_std': 2.8901610374450684, 'margin_dpo/beta_margin_grad_mean': -0.1345895677804947, 'margin_dpo/beta_margin_grad_std': 0.2077297866344452, 'epoch': 0.61} + 61%|███████████████████████████████████████████████▋ | 416/681 [30:47<13:33, 3.07s/it] 61%|███████████████████████████████████████████████▊ | 417/681 [30:50<12:51, 2.92s/it] {'loss': 0.3154, 'grad_norm': 38.555389404296875, 'learning_rate': 1.9777104743594686e-07, 'margin_dpo/margin_mean': 34.968475341796875, 'margin_dpo/margin_std': 23.240657806396484, 'logps/chosen': -66.67837524414062, 'logps/rejected': -119.5999755859375, 'logps/ref_chosen': -50.1987190246582, 'logps/ref_rejected': -68.15184020996094, 'logits/chosen': -0.6252127289772034, 'logits/rejected': -0.5568169355392456, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 34.968475341796875, 'margin_dpo/beta_margin_mean': 3.496847629547119, 'margin_dpo/beta_margin_std': 2.3686065673828125, 'margin_dpo/beta_margin_grad_mean': -0.12362627685070038, 'margin_dpo/beta_margin_grad_std': 0.17288993299007416, 'epoch': 0.61} + 61%|███████████████████████████████████████████████▊ | 417/681 [30:50<12:51, 2.92s/it] 61%|███████████████████████████████████████████████▉ | 418/681 [30:53<12:49, 2.93s/it] {'loss': 0.5663, 'grad_norm': 64.83741760253906, 'learning_rate': 1.965167291983757e-07, 'margin_dpo/margin_mean': 34.140960693359375, 'margin_dpo/margin_std': 31.345539093017578, 'logps/chosen': -99.16204833984375, 'logps/rejected': -156.01602172851562, 'logps/ref_chosen': -81.97846984863281, 'logps/ref_rejected': -104.69148254394531, 'logits/chosen': -0.6693556904792786, 'logits/rejected': -0.6072407960891724, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 34.140960693359375, 'margin_dpo/beta_margin_mean': 3.4140961170196533, 'margin_dpo/beta_margin_std': 3.2014167308807373, 'margin_dpo/beta_margin_grad_mean': -0.16488902270793915, 'margin_dpo/beta_margin_grad_std': 0.2364022433757782, 'epoch': 0.61} + 61%|███████████████████████████████████████████████▉ | 418/681 [30:53<12:49, 2.93s/it] 62%|███████████████████████████████████████████████▉ | 419/681 [30:55<12:27, 2.85s/it] {'loss': 0.3088, 'grad_norm': 46.96452331542969, 'learning_rate': 1.9526382029472988e-07, 'margin_dpo/margin_mean': 33.94792938232422, 'margin_dpo/margin_std': 23.982418060302734, 'logps/chosen': -70.24662780761719, 'logps/rejected': -142.82901000976562, 'logps/ref_chosen': -52.948646545410156, 'logps/ref_rejected': -91.58309936523438, 'logits/chosen': -0.5874903202056885, 'logits/rejected': -0.5439319610595703, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 33.94792938232422, 'margin_dpo/beta_margin_mean': 3.3947930335998535, 'margin_dpo/beta_margin_std': 2.4165024757385254, 'margin_dpo/beta_margin_grad_mean': -0.11957548558712006, 'margin_dpo/beta_margin_grad_std': 0.16336920857429504, 'epoch': 0.62} + 62%|███████████████████████████████████████████████▉ | 419/681 [30:55<12:27, 2.85s/it] 62%|████████████████████████████████████████████████ | 420/681 [30:58<12:08, 2.79s/it] {'loss': 0.4567, 'grad_norm': 61.41410827636719, 'learning_rate': 1.9401235374032425e-07, 'margin_dpo/margin_mean': 32.95981979370117, 'margin_dpo/margin_std': 27.405033111572266, 'logps/chosen': -95.875244140625, 'logps/rejected': -120.385009765625, 'logps/ref_chosen': -77.7699203491211, 'logps/ref_rejected': -69.31985473632812, 'logits/chosen': -0.6708568930625916, 'logits/rejected': -0.594412624835968, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 32.95982360839844, 'margin_dpo/beta_margin_mean': 3.2959823608398438, 'margin_dpo/beta_margin_std': 2.774785041809082, 'margin_dpo/beta_margin_grad_mean': -0.1499803215265274, 'margin_dpo/beta_margin_grad_std': 0.21647407114505768, 'epoch': 0.62} + 62%|████████████████████████████████████████████████ | 420/681 [30:58<12:08, 2.79s/it] 62%|████████████████████████████████████████████████▏ | 421/681 [31:01<11:55, 2.75s/it] {'loss': 0.6226, 'grad_norm': 79.4913330078125, 'learning_rate': 1.9276236251246653e-07, 'margin_dpo/margin_mean': 27.947509765625, 'margin_dpo/margin_std': 26.780242919921875, 'logps/chosen': -73.95745849609375, 'logps/rejected': -137.42054748535156, 'logps/ref_chosen': -53.765865325927734, 'logps/ref_rejected': -89.28144836425781, 'logits/chosen': -0.6430982351303101, 'logits/rejected': -0.6089684963226318, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 27.947509765625, 'margin_dpo/beta_margin_mean': 2.794750928878784, 'margin_dpo/beta_margin_std': 2.7135543823242188, 'margin_dpo/beta_margin_grad_mean': -0.1968107521533966, 'margin_dpo/beta_margin_grad_std': 0.24985744059085846, 'epoch': 0.62} + 62%|████████████████████████████████████████████████▏ | 421/681 [31:01<11:55, 2.75s/it] 62%|████████████████████████████████████████████████▎ | 422/681 [31:03<11:56, 2.77s/it] {'loss': 0.5663, 'grad_norm': 66.62350463867188, 'learning_rate': 1.9151387954958792e-07, 'margin_dpo/margin_mean': 30.136600494384766, 'margin_dpo/margin_std': 28.641185760498047, 'logps/chosen': -89.37240600585938, 'logps/rejected': -138.73875427246094, 'logps/ref_chosen': -68.6337661743164, 'logps/ref_rejected': -87.86351013183594, 'logits/chosen': -0.6613567471504211, 'logits/rejected': -0.6198326349258423, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 30.136600494384766, 'margin_dpo/beta_margin_mean': 3.013660192489624, 'margin_dpo/beta_margin_std': 2.8644890785217285, 'margin_dpo/beta_margin_grad_mean': -0.1885911077260971, 'margin_dpo/beta_margin_grad_std': 0.2437172681093216, 'epoch': 0.62} + 62%|████████████████████████████████████████████████▎ | 422/681 [31:04<11:56, 2.77s/it] 62%|████████████████████████████████████████████████▍ | 423/681 [31:06<11:26, 2.66s/it] {'loss': 0.5527, 'grad_norm': 66.34683227539062, 'learning_rate': 1.902669377503756e-07, 'margin_dpo/margin_mean': 31.304006576538086, 'margin_dpo/margin_std': 29.631959915161133, 'logps/chosen': -74.14385986328125, 'logps/rejected': -136.7641143798828, 'logps/ref_chosen': -54.99030303955078, 'logps/ref_rejected': -86.30654907226562, 'logits/chosen': -0.6761616468429565, 'logits/rejected': -0.6586691737174988, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 31.304006576538086, 'margin_dpo/beta_margin_mean': 3.1304006576538086, 'margin_dpo/beta_margin_std': 2.9750967025756836, 'margin_dpo/beta_margin_grad_mean': -0.18222779035568237, 'margin_dpo/beta_margin_grad_std': 0.2336231768131256, 'epoch': 0.62} + 62%|████████████████████████████████████████████████▍ | 423/681 [31:06<11:26, 2.66s/it] 62%|████████████████████████████████████████████████▌ | 424/681 [31:09<11:23, 2.66s/it] {'loss': 0.4263, 'grad_norm': 48.2248649597168, 'learning_rate': 1.890215699729057e-07, 'margin_dpo/margin_mean': 34.16087341308594, 'margin_dpo/margin_std': 30.704998016357422, 'logps/chosen': -73.47090148925781, 'logps/rejected': -118.09882354736328, 'logps/ref_chosen': -56.01191711425781, 'logps/ref_rejected': -66.47896575927734, 'logits/chosen': -0.6284000873565674, 'logits/rejected': -0.5798854231834412, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 34.16087341308594, 'margin_dpo/beta_margin_mean': 3.4160873889923096, 'margin_dpo/beta_margin_std': 3.078399419784546, 'margin_dpo/beta_margin_grad_mean': -0.15188807249069214, 'margin_dpo/beta_margin_grad_std': 0.21018096804618835, 'epoch': 0.62} + 62%|████████████████████████████████████████████████▌ | 424/681 [31:09<11:23, 2.66s/it] 62%|████████████████████████████████████████████████▋ | 425/681 [31:11<11:10, 2.62s/it] {'loss': 0.5067, 'grad_norm': 56.79523849487305, 'learning_rate': 1.8777780903377732e-07, 'margin_dpo/margin_mean': 30.633705139160156, 'margin_dpo/margin_std': 24.710655212402344, 'logps/chosen': -65.49158477783203, 'logps/rejected': -145.18174743652344, 'logps/ref_chosen': -46.868995666503906, 'logps/ref_rejected': -95.92545318603516, 'logits/chosen': -0.6415660381317139, 'logits/rejected': -0.6306988000869751, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 30.63370704650879, 'margin_dpo/beta_margin_mean': 3.063370704650879, 'margin_dpo/beta_margin_std': 2.508201837539673, 'margin_dpo/beta_margin_grad_mean': -0.16396918892860413, 'margin_dpo/beta_margin_grad_std': 0.22638258337974548, 'epoch': 0.62} + 62%|████████████████████████████████████████████████▋ | 425/681 [31:11<11:10, 2.62s/it] 63%|████████████████████████████████████████████████▊ | 426/681 [31:14<11:15, 2.65s/it] {'loss': 0.4413, 'grad_norm': 73.21717071533203, 'learning_rate': 1.8653568770724803e-07, 'margin_dpo/margin_mean': 33.87653732299805, 'margin_dpo/margin_std': 26.354013442993164, 'logps/chosen': -93.59241485595703, 'logps/rejected': -132.15199279785156, 'logps/ref_chosen': -76.58354187011719, 'logps/ref_rejected': -81.26658630371094, 'logits/chosen': -0.6280812621116638, 'logits/rejected': -0.5743027925491333, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 33.87653732299805, 'margin_dpo/beta_margin_mean': 3.3876538276672363, 'margin_dpo/beta_margin_std': 2.695366144180298, 'margin_dpo/beta_margin_grad_mean': -0.13312982022762299, 'margin_dpo/beta_margin_grad_std': 0.21179711818695068, 'epoch': 0.63} + 63%|████████████████████████████████████████████████▊ | 426/681 [31:14<11:15, 2.65s/it] 63%|████████████████████████████████████████████████▉ | 427/681 [31:16<11:12, 2.65s/it] {'loss': 0.5885, 'grad_norm': 56.27901840209961, 'learning_rate': 1.8529523872436977e-07, 'margin_dpo/margin_mean': 24.72673797607422, 'margin_dpo/margin_std': 23.543621063232422, 'logps/chosen': -81.7194595336914, 'logps/rejected': -120.1583251953125, 'logps/ref_chosen': -64.8538818359375, 'logps/ref_rejected': -78.56600952148438, 'logits/chosen': -0.6733847856521606, 'logits/rejected': -0.6199424266815186, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 24.72673797607422, 'margin_dpo/beta_margin_mean': 2.4726738929748535, 'margin_dpo/beta_margin_std': 2.3600223064422607, 'margin_dpo/beta_margin_grad_mean': -0.190776988863945, 'margin_dpo/beta_margin_grad_std': 0.20414692163467407, 'epoch': 0.63} + 63%|████████████████████████████████████████████████▉ | 427/681 [31:16<11:12, 2.65s/it] 63%|█████████████████████████████████████████████████ | 428/681 [31:19<11:15, 2.67s/it] {'loss': 0.3299, 'grad_norm': 44.09659957885742, 'learning_rate': 1.8405649477212697e-07, 'margin_dpo/margin_mean': 35.59562683105469, 'margin_dpo/margin_std': 27.276784896850586, 'logps/chosen': -83.10867309570312, 'logps/rejected': -159.34945678710938, 'logps/ref_chosen': -62.63666534423828, 'logps/ref_rejected': -103.28182220458984, 'logits/chosen': -0.6260280609130859, 'logits/rejected': -0.5897619724273682, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 35.59562683105469, 'margin_dpo/beta_margin_mean': 3.559562921524048, 'margin_dpo/beta_margin_std': 2.730299234390259, 'margin_dpo/beta_margin_grad_mean': -0.124484583735466, 'margin_dpo/beta_margin_grad_std': 0.1756177842617035, 'epoch': 0.63} + 63%|█████████████████████████████████████████████████ | 428/681 [31:19<11:15, 2.67s/it] 63%|█████████████████████████████████████████████████▏ | 429/681 [31:22<11:27, 2.73s/it] {'loss': 0.595, 'grad_norm': 61.60802459716797, 'learning_rate': 1.828194884925749e-07, 'margin_dpo/margin_mean': 29.67691421508789, 'margin_dpo/margin_std': 28.60194969177246, 'logps/chosen': -101.16323852539062, 'logps/rejected': -141.40106201171875, 'logps/ref_chosen': -81.23401641845703, 'logps/ref_rejected': -91.79493713378906, 'logits/chosen': -0.636346697807312, 'logits/rejected': -0.5803790092468262, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 29.67691421508789, 'margin_dpo/beta_margin_mean': 2.967691421508789, 'margin_dpo/beta_margin_std': 2.905752182006836, 'margin_dpo/beta_margin_grad_mean': -0.1930510401725769, 'margin_dpo/beta_margin_grad_std': 0.24151724576950073, 'epoch': 0.63} + 63%|█████████████████████████████████████████████████▏ | 429/681 [31:22<11:27, 2.73s/it] 63%|█████████████████████████████████████████████████▎ | 430/681 [31:25<11:57, 2.86s/it] {'loss': 0.4761, 'grad_norm': 51.62448501586914, 'learning_rate': 1.8158425248197928e-07, 'margin_dpo/margin_mean': 30.790908813476562, 'margin_dpo/margin_std': 26.328550338745117, 'logps/chosen': -79.01585388183594, 'logps/rejected': -153.30923461914062, 'logps/ref_chosen': -60.92032241821289, 'logps/ref_rejected': -104.42280578613281, 'logits/chosen': -0.6227689981460571, 'logits/rejected': -0.6045354008674622, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 30.790908813476562, 'margin_dpo/beta_margin_mean': 3.0790908336639404, 'margin_dpo/beta_margin_std': 2.675757884979248, 'margin_dpo/beta_margin_grad_mean': -0.1646682769060135, 'margin_dpo/beta_margin_grad_std': 0.2231719046831131, 'epoch': 0.63} + 63%|█████████████████████████████████████████████████▎ | 430/681 [31:25<11:57, 2.86s/it] 63%|█████████████████████████████████████████████████▎ | 431/681 [31:28<11:46, 2.83s/it] {'loss': 0.3416, 'grad_norm': 45.01468276977539, 'learning_rate': 1.8035081928995788e-07, 'margin_dpo/margin_mean': 34.62909698486328, 'margin_dpo/margin_std': 26.410173416137695, 'logps/chosen': -76.03721618652344, 'logps/rejected': -146.1577911376953, 'logps/ref_chosen': -57.348751068115234, 'logps/ref_rejected': -92.84022521972656, 'logits/chosen': -0.6120933294296265, 'logits/rejected': -0.5965217351913452, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 34.62909698486328, 'margin_dpo/beta_margin_mean': 3.4629099369049072, 'margin_dpo/beta_margin_std': 2.6419167518615723, 'margin_dpo/beta_margin_grad_mean': -0.13492785394191742, 'margin_dpo/beta_margin_grad_std': 0.17364878952503204, 'epoch': 0.63} + 63%|█████████████████████████████████████████████████▎ | 431/681 [31:28<11:46, 2.83s/it] 63%|█████████████████████████████████████████████████▍ | 432/681 [31:31<11:40, 2.81s/it] {'loss': 0.4396, 'grad_norm': 55.2720947265625, 'learning_rate': 1.791192214186223e-07, 'margin_dpo/margin_mean': 32.3117790222168, 'margin_dpo/margin_std': 27.102590560913086, 'logps/chosen': -88.92323303222656, 'logps/rejected': -148.73974609375, 'logps/ref_chosen': -71.07479095458984, 'logps/ref_rejected': -98.57952880859375, 'logits/chosen': -0.6020532250404358, 'logits/rejected': -0.5625859498977661, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 32.3117790222168, 'margin_dpo/beta_margin_mean': 3.231178045272827, 'margin_dpo/beta_margin_std': 2.7881994247436523, 'margin_dpo/beta_margin_grad_mean': -0.1505272537469864, 'margin_dpo/beta_margin_grad_std': 0.20940996706485748, 'epoch': 0.63} + 63%|█████████████████████████████████████████████████▍ | 432/681 [31:31<11:40, 2.81s/it] 64%|█████████████████████████████████████████████████▌ | 433/681 [31:33<11:12, 2.71s/it] {'loss': 0.5849, 'grad_norm': 71.04937744140625, 'learning_rate': 1.7788949132172193e-07, 'margin_dpo/margin_mean': 28.365665435791016, 'margin_dpo/margin_std': 26.324649810791016, 'logps/chosen': -81.66122436523438, 'logps/rejected': -147.70458984375, 'logps/ref_chosen': -58.273193359375, 'logps/ref_rejected': -95.95089721679688, 'logits/chosen': -0.6384241580963135, 'logits/rejected': -0.6068836450576782, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 28.365663528442383, 'margin_dpo/beta_margin_mean': 2.83656644821167, 'margin_dpo/beta_margin_std': 2.67340350151062, 'margin_dpo/beta_margin_grad_mean': -0.19293591380119324, 'margin_dpo/beta_margin_grad_std': 0.2420828938484192, 'epoch': 0.64} + 64%|█████████████████████████████████████████████████▌ | 433/681 [31:33<11:12, 2.71s/it] 64%|█████████████████████████████████████████████████▋ | 434/681 [31:36<11:01, 2.68s/it] {'loss': 0.4218, 'grad_norm': 48.197303771972656, 'learning_rate': 1.7666166140378853e-07, 'margin_dpo/margin_mean': 29.513980865478516, 'margin_dpo/margin_std': 25.25749969482422, 'logps/chosen': -79.50520324707031, 'logps/rejected': -125.54408264160156, 'logps/ref_chosen': -61.97370147705078, 'logps/ref_rejected': -78.49861145019531, 'logits/chosen': -0.6621353626251221, 'logits/rejected': -0.6182979345321655, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 29.513980865478516, 'margin_dpo/beta_margin_mean': 2.9513981342315674, 'margin_dpo/beta_margin_std': 2.5280110836029053, 'margin_dpo/beta_margin_grad_mean': -0.15651409327983856, 'margin_dpo/beta_margin_grad_std': 0.19872474670410156, 'epoch': 0.64} + 64%|█████████████████████████████████████████████████▋ | 434/681 [31:36<11:01, 2.68s/it] 64%|█████████████████████████████████████████████████▊ | 435/681 [31:38<10:27, 2.55s/it] {'loss': 0.5053, 'grad_norm': 63.86077117919922, 'learning_rate': 1.7543576401928218e-07, 'margin_dpo/margin_mean': 32.3472900390625, 'margin_dpo/margin_std': 29.455238342285156, 'logps/chosen': -69.592041015625, 'logps/rejected': -138.00416564941406, 'logps/ref_chosen': -51.502052307128906, 'logps/ref_rejected': -87.56689453125, 'logits/chosen': -0.6548939943313599, 'logits/rejected': -0.6191599369049072, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 32.3472900390625, 'margin_dpo/beta_margin_mean': 3.234729051589966, 'margin_dpo/beta_margin_std': 2.9603843688964844, 'margin_dpo/beta_margin_grad_mean': -0.1661788821220398, 'margin_dpo/beta_margin_grad_std': 0.21013152599334717, 'epoch': 0.64} + 64%|█████████████████████████████████████████████████▊ | 435/681 [31:38<10:27, 2.55s/it] 64%|█████████████████████████████████████████████████▉ | 436/681 [31:41<10:30, 2.57s/it] {'loss': 0.3539, 'grad_norm': 40.332698822021484, 'learning_rate': 1.742118314717391e-07, 'margin_dpo/margin_mean': 31.527891159057617, 'margin_dpo/margin_std': 24.248245239257812, 'logps/chosen': -88.88678741455078, 'logps/rejected': -131.7387237548828, 'logps/ref_chosen': -71.40371704101562, 'logps/ref_rejected': -82.72775268554688, 'logits/chosen': -0.632080078125, 'logits/rejected': -0.5719594955444336, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 31.527891159057617, 'margin_dpo/beta_margin_mean': 3.152789354324341, 'margin_dpo/beta_margin_std': 2.4287147521972656, 'margin_dpo/beta_margin_grad_mean': -0.13681164383888245, 'margin_dpo/beta_margin_grad_std': 0.17888766527175903, 'epoch': 0.64} + 64%|█████████████████████████████████████████████████▉ | 436/681 [31:41<10:30, 2.57s/it] 64%|██████████████████████████████████████████████████ | 437/681 [31:43<10:42, 2.63s/it] {'loss': 0.5269, 'grad_norm': 51.00373840332031, 'learning_rate': 1.7298989601292036e-07, 'margin_dpo/margin_mean': 28.168094635009766, 'margin_dpo/margin_std': 23.416202545166016, 'logps/chosen': -81.99353790283203, 'logps/rejected': -127.4609375, 'logps/ref_chosen': -64.7442626953125, 'logps/ref_rejected': -82.04356384277344, 'logits/chosen': -0.6353539228439331, 'logits/rejected': -0.5929083824157715, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 28.168094635009766, 'margin_dpo/beta_margin_mean': 2.81680965423584, 'margin_dpo/beta_margin_std': 2.37345027923584, 'margin_dpo/beta_margin_grad_mean': -0.1786879152059555, 'margin_dpo/beta_margin_grad_std': 0.23251357674598694, 'epoch': 0.64} + 64%|██████████████████████████████████████████████████ | 437/681 [31:43<10:42, 2.63s/it] 64%|██████████████████████████████████████████████████▏ | 438/681 [31:46<10:23, 2.56s/it] {'loss': 0.3695, 'grad_norm': 63.38606643676758, 'learning_rate': 1.7176998984196144e-07, 'margin_dpo/margin_mean': 34.36668395996094, 'margin_dpo/margin_std': 26.956180572509766, 'logps/chosen': -78.18193817138672, 'logps/rejected': -136.60678100585938, 'logps/ref_chosen': -59.0186653137207, 'logps/ref_rejected': -83.07682037353516, 'logits/chosen': -0.6576756238937378, 'logits/rejected': -0.5832280516624451, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 34.36668395996094, 'margin_dpo/beta_margin_mean': 3.4366683959960938, 'margin_dpo/beta_margin_std': 2.721860408782959, 'margin_dpo/beta_margin_grad_mean': -0.13736094534397125, 'margin_dpo/beta_margin_grad_std': 0.18339543044567108, 'epoch': 0.64} + 64%|██████████████████████████████████████████████████▏ | 438/681 [31:46<10:23, 2.56s/it] 64%|██████████████████████████████████████████████████▎ | 439/681 [31:48<10:10, 2.52s/it] {'loss': 0.5261, 'grad_norm': 71.34723663330078, 'learning_rate': 1.7055214510452458e-07, 'margin_dpo/margin_mean': 26.974590301513672, 'margin_dpo/margin_std': 23.787738800048828, 'logps/chosen': -77.27565002441406, 'logps/rejected': -134.45162963867188, 'logps/ref_chosen': -53.784080505371094, 'logps/ref_rejected': -83.98545837402344, 'logits/chosen': -0.6156207323074341, 'logits/rejected': -0.5937438607215881, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 26.97458839416504, 'margin_dpo/beta_margin_mean': 2.6974589824676514, 'margin_dpo/beta_margin_std': 2.4870941638946533, 'margin_dpo/beta_margin_grad_mean': -0.18373528122901917, 'margin_dpo/beta_margin_grad_std': 0.2151244729757309, 'epoch': 0.64} + 64%|██████████████████████████████████████████████████▎ | 439/681 [31:48<10:10, 2.52s/it] 65%|██████████████████████████████████████████████████▍ | 440/681 [31:51<09:59, 2.49s/it] {'loss': 0.6669, 'grad_norm': 96.4582290649414, 'learning_rate': 1.6933639389195134e-07, 'margin_dpo/margin_mean': 25.880369186401367, 'margin_dpo/margin_std': 27.07331085205078, 'logps/chosen': -96.89436340332031, 'logps/rejected': -140.70578002929688, 'logps/ref_chosen': -78.56671905517578, 'logps/ref_rejected': -96.49775695800781, 'logits/chosen': -0.6607520580291748, 'logits/rejected': -0.6199520826339722, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 25.880369186401367, 'margin_dpo/beta_margin_mean': 2.5880370140075684, 'margin_dpo/beta_margin_std': 2.716387987136841, 'margin_dpo/beta_margin_grad_mean': -0.2098814845085144, 'margin_dpo/beta_margin_grad_std': 0.25330764055252075, 'epoch': 0.65} + 65%|██████████████████████████████████████████████████▍ | 440/681 [31:51<09:59, 2.49s/it] 65%|██████████████████████████████████████████████████▌ | 441/681 [31:54<10:32, 2.63s/it] {'loss': 0.4379, 'grad_norm': 49.82929229736328, 'learning_rate': 1.681227682404166e-07, 'margin_dpo/margin_mean': 30.808923721313477, 'margin_dpo/margin_std': 23.68011474609375, 'logps/chosen': -80.72434997558594, 'logps/rejected': -147.17962646484375, 'logps/ref_chosen': -60.824440002441406, 'logps/ref_rejected': -96.47080993652344, 'logits/chosen': -0.5963351726531982, 'logits/rejected': -0.5610902309417725, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 30.808923721313477, 'margin_dpo/beta_margin_mean': 3.080892562866211, 'margin_dpo/beta_margin_std': 2.426534414291382, 'margin_dpo/beta_margin_grad_mean': -0.13756851851940155, 'margin_dpo/beta_margin_grad_std': 0.19719012081623077, 'epoch': 0.65} + 65%|██████████████████████████████████████████████████▌ | 441/681 [31:54<10:32, 2.63s/it] 65%|██████████████████████████████████████████████████▋ | 442/681 [31:56<10:31, 2.64s/it] {'loss': 0.2823, 'grad_norm': 36.576942443847656, 'learning_rate': 1.669113001300851e-07, 'margin_dpo/margin_mean': 37.83577346801758, 'margin_dpo/margin_std': 26.404239654541016, 'logps/chosen': -64.97787475585938, 'logps/rejected': -132.34170532226562, 'logps/ref_chosen': -47.01121520996094, 'logps/ref_rejected': -76.53926086425781, 'logits/chosen': -0.6140519380569458, 'logits/rejected': -0.5783543586730957, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 37.83577346801758, 'margin_dpo/beta_margin_mean': 3.7835774421691895, 'margin_dpo/beta_margin_std': 2.697312593460083, 'margin_dpo/beta_margin_grad_mean': -0.1093648299574852, 'margin_dpo/beta_margin_grad_std': 0.16080023348331451, 'epoch': 0.65} + 65%|██████████████████████████████████████████████████▋ | 442/681 [31:56<10:31, 2.64s/it] 65%|██████████████████████████████████████████████████▋ | 443/681 [31:59<10:30, 2.65s/it] {'loss': 0.6573, 'grad_norm': 79.94059753417969, 'learning_rate': 1.6570202148426815e-07, 'margin_dpo/margin_mean': 28.54714012145996, 'margin_dpo/margin_std': 27.68130111694336, 'logps/chosen': -93.62142944335938, 'logps/rejected': -137.5754852294922, 'logps/ref_chosen': -71.27301788330078, 'logps/ref_rejected': -86.679931640625, 'logits/chosen': -0.6004323959350586, 'logits/rejected': -0.5627496242523193, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 28.54714012145996, 'margin_dpo/beta_margin_mean': 2.8547141551971436, 'margin_dpo/beta_margin_std': 2.7842366695404053, 'margin_dpo/beta_margin_grad_mean': -0.20045503973960876, 'margin_dpo/beta_margin_grad_std': 0.263896644115448, 'epoch': 0.65} + 65%|██████████████████████████████████████████████████▋ | 443/681 [31:59<10:30, 2.65s/it] 65%|██████████████████████████████████████████████████▊ | 444/681 [32:02<10:25, 2.64s/it] {'loss': 0.4389, 'grad_norm': 47.294471740722656, 'learning_rate': 1.6449496416858282e-07, 'margin_dpo/margin_mean': 34.26472473144531, 'margin_dpo/margin_std': 28.598800659179688, 'logps/chosen': -76.857421875, 'logps/rejected': -151.163330078125, 'logps/ref_chosen': -57.213706970214844, 'logps/ref_rejected': -97.25489044189453, 'logits/chosen': -0.5860676169395447, 'logits/rejected': -0.5605667233467102, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 34.26472091674805, 'margin_dpo/beta_margin_mean': 3.4264724254608154, 'margin_dpo/beta_margin_std': 2.8675122261047363, 'margin_dpo/beta_margin_grad_mean': -0.14946991205215454, 'margin_dpo/beta_margin_grad_std': 0.21209140121936798, 'epoch': 0.65} + 65%|██████████████████████████████████████████████████▊ | 444/681 [32:02<10:25, 2.64s/it] 65%|██████████████████████████████████████████████████▉ | 445/681 [32:04<10:20, 2.63s/it] {'loss': 0.4624, 'grad_norm': 61.75363540649414, 'learning_rate': 1.6329015999011182e-07, 'margin_dpo/margin_mean': 31.917476654052734, 'margin_dpo/margin_std': 27.65774154663086, 'logps/chosen': -84.33077239990234, 'logps/rejected': -141.63113403320312, 'logps/ref_chosen': -67.29979705810547, 'logps/ref_rejected': -92.68267822265625, 'logits/chosen': -0.6285964250564575, 'logits/rejected': -0.5963205695152283, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 31.917476654052734, 'margin_dpo/beta_margin_mean': 3.1917476654052734, 'margin_dpo/beta_margin_std': 2.7972888946533203, 'margin_dpo/beta_margin_grad_mean': -0.16633237898349762, 'margin_dpo/beta_margin_grad_std': 0.21091465651988983, 'epoch': 0.65} + 65%|██████████████████████████████████████████████████▉ | 445/681 [32:04<10:20, 2.63s/it] 65%|███████████████████████████████████████████████████ | 446/681 [32:07<10:22, 2.65s/it] {'loss': 0.4368, 'grad_norm': 54.28517532348633, 'learning_rate': 1.6208764069656578e-07, 'margin_dpo/margin_mean': 30.172958374023438, 'margin_dpo/margin_std': 26.31899070739746, 'logps/chosen': -76.78812408447266, 'logps/rejected': -149.1267852783203, 'logps/ref_chosen': -59.098487854003906, 'logps/ref_rejected': -101.26419067382812, 'logits/chosen': -0.5897877216339111, 'logits/rejected': -0.568926215171814, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 30.172958374023438, 'margin_dpo/beta_margin_mean': 3.0172958374023438, 'margin_dpo/beta_margin_std': 2.6881942749023438, 'margin_dpo/beta_margin_grad_mean': -0.16494978964328766, 'margin_dpo/beta_margin_grad_std': 0.1965719312429428, 'epoch': 0.65} + 65%|███████████████████████████████████████████████████ | 446/681 [32:07<10:22, 2.65s/it] 66%|███████████████████████████████████████████████████▏ | 447/681 [32:10<10:20, 2.65s/it] {'loss': 0.4538, 'grad_norm': 51.364315032958984, 'learning_rate': 1.608874379754465e-07, 'margin_dpo/margin_mean': 31.73101806640625, 'margin_dpo/margin_std': 28.281917572021484, 'logps/chosen': -76.43832397460938, 'logps/rejected': -150.78875732421875, 'logps/ref_chosen': -56.07533264160156, 'logps/ref_rejected': -98.69475555419922, 'logits/chosen': -0.660834014415741, 'logits/rejected': -0.6618390083312988, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 31.731016159057617, 'margin_dpo/beta_margin_mean': 3.1731016635894775, 'margin_dpo/beta_margin_std': 2.897716760635376, 'margin_dpo/beta_margin_grad_mean': -0.16636352241039276, 'margin_dpo/beta_margin_grad_std': 0.20971129834651947, 'epoch': 0.66} + 66%|███████████████████████████████████████████████████▏ | 447/681 [32:10<10:20, 2.65s/it] 66%|███████████████████████████████████████████████████▎ | 448/681 [32:12<10:32, 2.72s/it] {'loss': 0.3892, 'grad_norm': 47.65716552734375, 'learning_rate': 1.5968958345321177e-07, 'margin_dpo/margin_mean': 32.101654052734375, 'margin_dpo/margin_std': 25.43906021118164, 'logps/chosen': -80.88053131103516, 'logps/rejected': -155.2429962158203, 'logps/ref_chosen': -60.00384521484375, 'logps/ref_rejected': -102.26465606689453, 'logits/chosen': -0.6168828010559082, 'logits/rejected': -0.600253701210022, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 32.101654052734375, 'margin_dpo/beta_margin_mean': 3.2101657390594482, 'margin_dpo/beta_margin_std': 2.5543386936187744, 'margin_dpo/beta_margin_grad_mean': -0.13886789977550507, 'margin_dpo/beta_margin_grad_std': 0.18517683446407318, 'epoch': 0.66} + 66%|███████████████████████████████████████████████████▎ | 448/681 [32:12<10:32, 2.72s/it] 66%|███████████████████████████████████████████████████▍ | 449/681 [32:15<10:17, 2.66s/it] {'loss': 0.6043, 'grad_norm': 79.98429107666016, 'learning_rate': 1.584941086944423e-07, 'margin_dpo/margin_mean': 31.41507339477539, 'margin_dpo/margin_std': 30.071718215942383, 'logps/chosen': -89.62152099609375, 'logps/rejected': -142.1068878173828, 'logps/ref_chosen': -67.52661895751953, 'logps/ref_rejected': -88.59690856933594, 'logits/chosen': -0.5817546248435974, 'logits/rejected': -0.5362948179244995, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 31.41507339477539, 'margin_dpo/beta_margin_mean': 3.141507387161255, 'margin_dpo/beta_margin_std': 3.0401062965393066, 'margin_dpo/beta_margin_grad_mean': -0.17372801899909973, 'margin_dpo/beta_margin_grad_std': 0.23874573409557343, 'epoch': 0.66} + 66%|███████████████████████████████████████████████████▍ | 449/681 [32:15<10:17, 2.66s/it] 66%|███████████████████████████████████████████████████▌ | 450/681 [32:18<10:54, 2.83s/it] {'loss': 0.3207, 'grad_norm': 44.39156723022461, 'learning_rate': 1.573010452010098e-07, 'margin_dpo/margin_mean': 34.53790283203125, 'margin_dpo/margin_std': 25.840599060058594, 'logps/chosen': -73.27051544189453, 'logps/rejected': -153.4552459716797, 'logps/ref_chosen': -57.108116149902344, 'logps/ref_rejected': -102.75494384765625, 'logits/chosen': -0.6516839265823364, 'logits/rejected': -0.6243829727172852, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 34.53790283203125, 'margin_dpo/beta_margin_mean': 3.4537904262542725, 'margin_dpo/beta_margin_std': 2.6313655376434326, 'margin_dpo/beta_margin_grad_mean': -0.12840278446674347, 'margin_dpo/beta_margin_grad_std': 0.1644502729177475, 'epoch': 0.66} + 66%|███████████████████████████████████████████████████▌ | 450/681 [32:18<10:54, 2.83s/it] 66%|███████████████████████████████████████████████████▋ | 451/681 [32:21<10:46, 2.81s/it] {'loss': 0.5537, 'grad_norm': 75.2901382446289, 'learning_rate': 1.5611042441124687e-07, 'margin_dpo/margin_mean': 29.465293884277344, 'margin_dpo/margin_std': 25.818279266357422, 'logps/chosen': -80.07470703125, 'logps/rejected': -124.00057983398438, 'logps/ref_chosen': -58.46883010864258, 'logps/ref_rejected': -72.92941284179688, 'logits/chosen': -0.6581634283065796, 'logits/rejected': -0.6103047132492065, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 29.465293884277344, 'margin_dpo/beta_margin_mean': 2.9465293884277344, 'margin_dpo/beta_margin_std': 2.6042659282684326, 'margin_dpo/beta_margin_grad_mean': -0.16929282248020172, 'margin_dpo/beta_margin_grad_std': 0.2288813591003418, 'epoch': 0.66} + 66%|███████████████████████████████████████████████████▋ | 451/681 [32:21<10:46, 2.81s/it] 66%|███████████████████████████████████████████████████▊ | 452/681 [32:24<10:44, 2.81s/it] {'loss': 0.2857, 'grad_norm': 35.9453239440918, 'learning_rate': 1.549222776991186e-07, 'margin_dpo/margin_mean': 30.134784698486328, 'margin_dpo/margin_std': 21.948862075805664, 'logps/chosen': -66.35121154785156, 'logps/rejected': -143.86688232421875, 'logps/ref_chosen': -50.39055252075195, 'logps/ref_rejected': -97.77143096923828, 'logits/chosen': -0.546400785446167, 'logits/rejected': -0.5479906797409058, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 30.134784698486328, 'margin_dpo/beta_margin_mean': 3.0134785175323486, 'margin_dpo/beta_margin_std': 2.2046637535095215, 'margin_dpo/beta_margin_grad_mean': -0.12084120512008667, 'margin_dpo/beta_margin_grad_std': 0.13351190090179443, 'epoch': 0.66} + 66%|███████████████████████████████████████████████████▊ | 452/681 [32:24<10:44, 2.81s/it] 67%|███████████████████████████████████████████████████▉ | 453/681 [32:26<10:19, 2.72s/it] {'loss': 0.4664, 'grad_norm': 51.65986633300781, 'learning_rate': 1.5373663637339584e-07, 'margin_dpo/margin_mean': 29.085243225097656, 'margin_dpo/margin_std': 25.423097610473633, 'logps/chosen': -76.96781921386719, 'logps/rejected': -130.54562377929688, 'logps/ref_chosen': -57.71485137939453, 'logps/ref_rejected': -82.20741271972656, 'logits/chosen': -0.6441305875778198, 'logits/rejected': -0.5928350687026978, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 29.085243225097656, 'margin_dpo/beta_margin_mean': 2.90852427482605, 'margin_dpo/beta_margin_std': 2.558598279953003, 'margin_dpo/beta_margin_grad_mean': -0.16998730599880219, 'margin_dpo/beta_margin_grad_std': 0.2013465166091919, 'epoch': 0.67} + 67%|███████████████████████████████████████████████████▉ | 453/681 [32:26<10:19, 2.72s/it] 67%|████████████████████████████████████████████████████ | 454/681 [32:29<10:13, 2.70s/it] {'loss': 0.4047, 'grad_norm': 59.35947036743164, 'learning_rate': 1.5255353167683017e-07, 'margin_dpo/margin_mean': 32.31932830810547, 'margin_dpo/margin_std': 25.902687072753906, 'logps/chosen': -81.52304077148438, 'logps/rejected': -137.84750366210938, 'logps/ref_chosen': -60.945648193359375, 'logps/ref_rejected': -84.9507827758789, 'logits/chosen': -0.6171283721923828, 'logits/rejected': -0.5738873481750488, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 32.31932830810547, 'margin_dpo/beta_margin_mean': 3.2319328784942627, 'margin_dpo/beta_margin_std': 2.6018524169921875, 'margin_dpo/beta_margin_grad_mean': -0.14290541410446167, 'margin_dpo/beta_margin_grad_std': 0.2013457864522934, 'epoch': 0.67} + 67%|████████████████████████████████████████████████████ | 454/681 [32:29<10:13, 2.70s/it] 67%|████████████████████████████████████████████████████ | 455/681 [32:31<10:01, 2.66s/it] {'loss': 0.3629, 'grad_norm': 93.24932861328125, 'learning_rate': 1.5137299478533064e-07, 'margin_dpo/margin_mean': 37.203800201416016, 'margin_dpo/margin_std': 26.29052734375, 'logps/chosen': -64.90336608886719, 'logps/rejected': -172.52194213867188, 'logps/ref_chosen': -44.88671112060547, 'logps/ref_rejected': -115.30147552490234, 'logits/chosen': -0.6162554621696472, 'logits/rejected': -0.5891969203948975, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 37.203800201416016, 'margin_dpo/beta_margin_mean': 3.7203803062438965, 'margin_dpo/beta_margin_std': 2.6521599292755127, 'margin_dpo/beta_margin_grad_mean': -0.1229911670088768, 'margin_dpo/beta_margin_grad_std': 0.19356586039066315, 'epoch': 0.67} + 67%|████████████████████████████████████████████████████ | 455/681 [32:32<10:01, 2.66s/it] 67%|████████████████████████████████████████████████████▏ | 456/681 [32:34<09:57, 2.66s/it] {'loss': 0.354, 'grad_norm': 49.41230010986328, 'learning_rate': 1.5019505680714232e-07, 'margin_dpo/margin_mean': 37.51462936401367, 'margin_dpo/margin_std': 28.42435073852539, 'logps/chosen': -74.30551147460938, 'logps/rejected': -160.00119018554688, 'logps/ref_chosen': -57.036781311035156, 'logps/ref_rejected': -105.21783447265625, 'logits/chosen': -0.6331825256347656, 'logits/rejected': -0.6310149431228638, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 37.51462936401367, 'margin_dpo/beta_margin_mean': 3.751462936401367, 'margin_dpo/beta_margin_std': 2.8913846015930176, 'margin_dpo/beta_margin_grad_mean': -0.13223397731781006, 'margin_dpo/beta_margin_grad_std': 0.1870342195034027, 'epoch': 0.67} + 67%|████████████████████████████████████████████████████▏ | 456/681 [32:34<09:57, 2.66s/it] 67%|████████████████████████████████████████████████████▎ | 457/681 [32:37<09:52, 2.65s/it] {'loss': 0.386, 'grad_norm': 59.397212982177734, 'learning_rate': 1.4901974878202627e-07, 'margin_dpo/margin_mean': 33.051368713378906, 'margin_dpo/margin_std': 24.472869873046875, 'logps/chosen': -72.51710510253906, 'logps/rejected': -136.43548583984375, 'logps/ref_chosen': -54.24253845214844, 'logps/ref_rejected': -85.10956573486328, 'logits/chosen': -0.6320329308509827, 'logits/rejected': -0.6049121618270874, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 33.051368713378906, 'margin_dpo/beta_margin_mean': 3.3051366806030273, 'margin_dpo/beta_margin_std': 2.4597647190093994, 'margin_dpo/beta_margin_grad_mean': -0.13050609827041626, 'margin_dpo/beta_margin_grad_std': 0.19707661867141724, 'epoch': 0.67} + 67%|████████████████████████████████████████████████████▎ | 457/681 [32:37<09:52, 2.65s/it] 67%|████████████████████████████████████████████████████▍ | 458/681 [32:39<09:37, 2.59s/it] {'loss': 0.4411, 'grad_norm': 56.77046585083008, 'learning_rate': 1.4784710168044212e-07, 'margin_dpo/margin_mean': 38.03219223022461, 'margin_dpo/margin_std': 32.41196060180664, 'logps/chosen': -74.71857452392578, 'logps/rejected': -155.025146484375, 'logps/ref_chosen': -55.40888214111328, 'logps/ref_rejected': -97.68325805664062, 'logits/chosen': -0.6297258138656616, 'logits/rejected': -0.5929204225540161, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 38.03219223022461, 'margin_dpo/beta_margin_mean': 3.8032190799713135, 'margin_dpo/beta_margin_std': 3.263706922531128, 'margin_dpo/beta_margin_grad_mean': -0.14024901390075684, 'margin_dpo/beta_margin_grad_std': 0.23043015599250793, 'epoch': 0.67} + 67%|████████████████████████████████████████████████████▍ | 458/681 [32:39<09:37, 2.59s/it] 67%|████████████████████████████████████████████████████▌ | 459/681 [32:42<09:37, 2.60s/it] {'loss': 0.4592, 'grad_norm': 47.203277587890625, 'learning_rate': 1.466771464027316e-07, 'margin_dpo/margin_mean': 28.914405822753906, 'margin_dpo/margin_std': 23.49092674255371, 'logps/chosen': -67.03455352783203, 'logps/rejected': -135.55999755859375, 'logps/ref_chosen': -46.55748748779297, 'logps/ref_rejected': -86.16854095458984, 'logits/chosen': -0.592144250869751, 'logits/rejected': -0.5651764869689941, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 28.914405822753906, 'margin_dpo/beta_margin_mean': 2.8914406299591064, 'margin_dpo/beta_margin_std': 2.370115280151367, 'margin_dpo/beta_margin_grad_mean': -0.16323688626289368, 'margin_dpo/beta_margin_grad_std': 0.19608436524868011, 'epoch': 0.67} + 67%|████████████████████████████████████████████████████▌ | 459/681 [32:42<09:37, 2.60s/it] 68%|████████████████████████████████████████████████████▋ | 460/681 [32:45<09:49, 2.67s/it] {'loss': 0.4209, 'grad_norm': 59.67298126220703, 'learning_rate': 1.4550991377830423e-07, 'margin_dpo/margin_mean': 32.565120697021484, 'margin_dpo/margin_std': 25.7642879486084, 'logps/chosen': -70.59028625488281, 'logps/rejected': -155.63986206054688, 'logps/ref_chosen': -51.63489532470703, 'logps/ref_rejected': -104.11935424804688, 'logits/chosen': -0.5806307792663574, 'logits/rejected': -0.5847660303115845, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 32.565120697021484, 'margin_dpo/beta_margin_mean': 3.25651216506958, 'margin_dpo/beta_margin_std': 2.5778074264526367, 'margin_dpo/beta_margin_grad_mean': -0.15195363759994507, 'margin_dpo/beta_margin_grad_std': 0.20993934571743011, 'epoch': 0.68} + 68%|████████████████████████████████████████████████████▋ | 460/681 [32:45<09:49, 2.67s/it] 68%|████████████████████████████████████████████████████▊ | 461/681 [32:47<09:50, 2.68s/it] {'loss': 0.5473, 'grad_norm': 59.93415069580078, 'learning_rate': 1.4434543456482518e-07, 'margin_dpo/margin_mean': 27.815326690673828, 'margin_dpo/margin_std': 27.13003921508789, 'logps/chosen': -79.71414184570312, 'logps/rejected': -138.8244171142578, 'logps/ref_chosen': -55.18195343017578, 'logps/ref_rejected': -86.47689819335938, 'logits/chosen': -0.5920594930648804, 'logits/rejected': -0.5768572688102722, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 27.815326690673828, 'margin_dpo/beta_margin_mean': 2.7815327644348145, 'margin_dpo/beta_margin_std': 2.8518621921539307, 'margin_dpo/beta_margin_grad_mean': -0.18966011703014374, 'margin_dpo/beta_margin_grad_std': 0.22385801374912262, 'epoch': 0.68} + 68%|████████████████████████████████████████████████████▊ | 461/681 [32:47<09:50, 2.68s/it] 68%|████████████████████████████████████████████████████▉ | 462/681 [32:50<09:37, 2.64s/it] {'loss': 0.554, 'grad_norm': 64.90670776367188, 'learning_rate': 1.4318373944740484e-07, 'margin_dpo/margin_mean': 26.862995147705078, 'margin_dpo/margin_std': 25.538467407226562, 'logps/chosen': -93.2876968383789, 'logps/rejected': -129.06378173828125, 'logps/ref_chosen': -69.92803955078125, 'logps/ref_rejected': -78.84111785888672, 'logits/chosen': -0.6181149482727051, 'logits/rejected': -0.5787901878356934, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 26.86299705505371, 'margin_dpo/beta_margin_mean': 2.6862998008728027, 'margin_dpo/beta_margin_std': 2.5792369842529297, 'margin_dpo/beta_margin_grad_mean': -0.1927950084209442, 'margin_dpo/beta_margin_grad_std': 0.22063319385051727, 'epoch': 0.68} + 68%|████████████████████████████████████████████████████▉ | 462/681 [32:50<09:37, 2.64s/it] 68%|█████████████████████████████████████████████████████ | 463/681 [32:52<09:21, 2.57s/it] {'loss': 0.3546, 'grad_norm': 50.19252014160156, 'learning_rate': 1.4202485903778976e-07, 'margin_dpo/margin_mean': 33.929237365722656, 'margin_dpo/margin_std': 23.769535064697266, 'logps/chosen': -75.74092864990234, 'logps/rejected': -143.4207763671875, 'logps/ref_chosen': -55.27437210083008, 'logps/ref_rejected': -89.02497863769531, 'logits/chosen': -0.6169182062149048, 'logits/rejected': -0.5887913703918457, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 33.929237365722656, 'margin_dpo/beta_margin_mean': 3.392923593521118, 'margin_dpo/beta_margin_std': 2.4047834873199463, 'margin_dpo/beta_margin_grad_mean': -0.12193028628826141, 'margin_dpo/beta_margin_grad_std': 0.19042545557022095, 'epoch': 0.68} + 68%|█████████████████████████████████████████████████████ | 463/681 [32:52<09:21, 2.57s/it] 68%|█████████████████████████████████████████████████████▏ | 464/681 [32:55<09:02, 2.50s/it] {'loss': 0.4531, 'grad_norm': 54.16157531738281, 'learning_rate': 1.4086882387355658e-07, 'margin_dpo/margin_mean': 34.593727111816406, 'margin_dpo/margin_std': 29.88116455078125, 'logps/chosen': -73.30712890625, 'logps/rejected': -159.47793579101562, 'logps/ref_chosen': -50.91230010986328, 'logps/ref_rejected': -102.4893798828125, 'logits/chosen': -0.6251201629638672, 'logits/rejected': -0.6308864951133728, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 34.593727111816406, 'margin_dpo/beta_margin_mean': 3.4593725204467773, 'margin_dpo/beta_margin_std': 2.9905498027801514, 'margin_dpo/beta_margin_grad_mean': -0.14893580973148346, 'margin_dpo/beta_margin_grad_std': 0.20893022418022156, 'epoch': 0.68} + 68%|█████████████████████████████████████████████████████▏ | 464/681 [32:55<09:02, 2.50s/it] 68%|█████████████████████████████████████████████████████▎ | 465/681 [32:57<09:15, 2.57s/it] {'loss': 0.2808, 'grad_norm': 50.176815032958984, 'learning_rate': 1.3971566441730714e-07, 'margin_dpo/margin_mean': 37.622222900390625, 'margin_dpo/margin_std': 25.363601684570312, 'logps/chosen': -81.1992416381836, 'logps/rejected': -172.650634765625, 'logps/ref_chosen': -60.116851806640625, 'logps/ref_rejected': -113.94602966308594, 'logits/chosen': -0.6043756008148193, 'logits/rejected': -0.5841087102890015, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 37.622222900390625, 'margin_dpo/beta_margin_mean': 3.7622225284576416, 'margin_dpo/beta_margin_std': 2.543063163757324, 'margin_dpo/beta_margin_grad_mean': -0.10672765225172043, 'margin_dpo/beta_margin_grad_std': 0.16949497163295746, 'epoch': 0.68} + 68%|█████████████████████████████████████████████████████▎ | 465/681 [32:57<09:15, 2.57s/it] 68%|█████████████████████████████████████████████████████▎ | 466/681 [33:00<09:42, 2.71s/it] {'loss': 0.3827, 'grad_norm': 57.16488265991211, 'learning_rate': 1.3856541105586545e-07, 'margin_dpo/margin_mean': 34.00209045410156, 'margin_dpo/margin_std': 23.383773803710938, 'logps/chosen': -75.47810363769531, 'logps/rejected': -146.87469482421875, 'logps/ref_chosen': -52.920921325683594, 'logps/ref_rejected': -90.3154296875, 'logits/chosen': -0.6066223382949829, 'logits/rejected': -0.5759164094924927, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 34.00209045410156, 'margin_dpo/beta_margin_mean': 3.4002089500427246, 'margin_dpo/beta_margin_std': 2.3887243270874023, 'margin_dpo/beta_margin_grad_mean': -0.12489843368530273, 'margin_dpo/beta_margin_grad_std': 0.20490986108779907, 'epoch': 0.68} + 68%|█████████████████████████████████████████████████████▎ | 466/681 [33:00<09:42, 2.71s/it] 69%|█████████████████████████████████████████████████████▍ | 467/681 [33:03<09:36, 2.69s/it] {'loss': 0.3729, 'grad_norm': 46.38023376464844, 'learning_rate': 1.3741809409947729e-07, 'margin_dpo/margin_mean': 34.46977996826172, 'margin_dpo/margin_std': 27.862186431884766, 'logps/chosen': -101.92547607421875, 'logps/rejected': -160.5396270751953, 'logps/ref_chosen': -78.7158203125, 'logps/ref_rejected': -102.86019897460938, 'logits/chosen': -0.6104651689529419, 'logits/rejected': -0.5786043405532837, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 34.46977615356445, 'margin_dpo/beta_margin_mean': 3.4469778537750244, 'margin_dpo/beta_margin_std': 2.8059871196746826, 'margin_dpo/beta_margin_grad_mean': -0.1380797028541565, 'margin_dpo/beta_margin_grad_std': 0.1899155229330063, 'epoch': 0.69} + 69%|█████████████████████████████████████████████████████▍ | 467/681 [33:03<09:36, 2.69s/it] 69%|█████████████████████████████████████████████████████▌ | 468/681 [33:06<09:41, 2.73s/it] {'loss': 0.3886, 'grad_norm': 55.537410736083984, 'learning_rate': 1.362737437810114e-07, 'margin_dpo/margin_mean': 32.051353454589844, 'margin_dpo/margin_std': 26.76758575439453, 'logps/chosen': -89.64823913574219, 'logps/rejected': -152.7930450439453, 'logps/ref_chosen': -69.93536376953125, 'logps/ref_rejected': -101.02881622314453, 'logits/chosen': -0.6192047595977783, 'logits/rejected': -0.5922250747680664, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 32.051353454589844, 'margin_dpo/beta_margin_mean': 3.2051353454589844, 'margin_dpo/beta_margin_std': 2.6790554523468018, 'margin_dpo/beta_margin_grad_mean': -0.1464032083749771, 'margin_dpo/beta_margin_grad_std': 0.18942488729953766, 'epoch': 0.69} + 69%|█████████████████████████████████████████████████████▌ | 468/681 [33:06<09:41, 2.73s/it] 69%|█████████████████████████████████████████████████████▋ | 469/681 [33:09<09:41, 2.74s/it] {'loss': 0.4299, 'grad_norm': 57.15333938598633, 'learning_rate': 1.351323902551631e-07, 'margin_dpo/margin_mean': 33.17765426635742, 'margin_dpo/margin_std': 27.252918243408203, 'logps/chosen': -91.19867706298828, 'logps/rejected': -161.0380401611328, 'logps/ref_chosen': -68.12469482421875, 'logps/ref_rejected': -104.78640747070312, 'logits/chosen': -0.6003662347793579, 'logits/rejected': -0.5658551454544067, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 33.17765426635742, 'margin_dpo/beta_margin_mean': 3.317765474319458, 'margin_dpo/beta_margin_std': 2.7324230670928955, 'margin_dpo/beta_margin_grad_mean': -0.14955471456050873, 'margin_dpo/beta_margin_grad_std': 0.21639080345630646, 'epoch': 0.69} + 69%|█████████████████████████████████████████████████████▋ | 469/681 [33:09<09:41, 2.74s/it] 69%|█████████████████████████████████████████████████████▊ | 470/681 [33:11<09:32, 2.71s/it] {'loss': 0.2368, 'grad_norm': 41.5388298034668, 'learning_rate': 1.339940635976592e-07, 'margin_dpo/margin_mean': 38.34840393066406, 'margin_dpo/margin_std': 23.939483642578125, 'logps/chosen': -64.00105285644531, 'logps/rejected': -141.2603759765625, 'logps/ref_chosen': -43.79193115234375, 'logps/ref_rejected': -82.70285034179688, 'logits/chosen': -0.5871062278747559, 'logits/rejected': -0.5616201162338257, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 38.3484001159668, 'margin_dpo/beta_margin_mean': 3.8348402976989746, 'margin_dpo/beta_margin_std': 2.4011597633361816, 'margin_dpo/beta_margin_grad_mean': -0.09441064298152924, 'margin_dpo/beta_margin_grad_std': 0.15031108260154724, 'epoch': 0.69} + 69%|█████████████████████████████████████████████████████▊ | 470/681 [33:11<09:32, 2.71s/it] 69%|█████████████████████████████████████████████████████▉ | 471/681 [33:14<09:07, 2.61s/it] {'loss': 0.4208, 'grad_norm': 54.3143310546875, 'learning_rate': 1.3285879380446563e-07, 'margin_dpo/margin_mean': 31.3731689453125, 'margin_dpo/margin_std': 24.42245101928711, 'logps/chosen': -87.58413696289062, 'logps/rejected': -139.228271484375, 'logps/ref_chosen': -63.33952331542969, 'logps/ref_rejected': -83.61048126220703, 'logits/chosen': -0.5919795036315918, 'logits/rejected': -0.5648236870765686, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 31.373167037963867, 'margin_dpo/beta_margin_mean': 3.1373167037963867, 'margin_dpo/beta_margin_std': 2.457742929458618, 'margin_dpo/beta_margin_grad_mean': -0.1538044661283493, 'margin_dpo/beta_margin_grad_std': 0.2021295428276062, 'epoch': 0.69} + 69%|█████████████████████████████████████████████████████▉ | 471/681 [33:14<09:07, 2.61s/it] 69%|██████████████████████████████████████████████████████ | 472/681 [33:17<09:21, 2.69s/it] {'loss': 0.3109, 'grad_norm': 50.913185119628906, 'learning_rate': 1.317266107909975e-07, 'margin_dpo/margin_mean': 40.24461364746094, 'margin_dpo/margin_std': 33.13086700439453, 'logps/chosen': -104.90176391601562, 'logps/rejected': -178.68946838378906, 'logps/ref_chosen': -83.66609954833984, 'logps/ref_rejected': -117.20919799804688, 'logits/chosen': -0.6416307687759399, 'logits/rejected': -0.5852631330490112, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 40.2446174621582, 'margin_dpo/beta_margin_mean': 4.02446174621582, 'margin_dpo/beta_margin_std': 3.3216898441314697, 'margin_dpo/beta_margin_grad_mean': -0.11783776432275772, 'margin_dpo/beta_margin_grad_std': 0.1787194162607193, 'epoch': 0.69} + 69%|██████████████████████████████████████████████████████ | 472/681 [33:17<09:21, 2.69s/it] 69%|██████████████████████████████████████████████████████▏ | 473/681 [33:19<09:21, 2.70s/it] {'loss': 0.4899, 'grad_norm': 78.06228637695312, 'learning_rate': 1.3059754439133002e-07, 'margin_dpo/margin_mean': 28.15515899658203, 'margin_dpo/margin_std': 22.536598205566406, 'logps/chosen': -87.47222900390625, 'logps/rejected': -133.27700805664062, 'logps/ref_chosen': -63.49696731567383, 'logps/ref_rejected': -81.14657592773438, 'logits/chosen': -0.5605521202087402, 'logits/rejected': -0.5147773623466492, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 28.15515899658203, 'margin_dpo/beta_margin_mean': 2.8155159950256348, 'margin_dpo/beta_margin_std': 2.263782501220703, 'margin_dpo/beta_margin_grad_mean': -0.17109636962413788, 'margin_dpo/beta_margin_grad_std': 0.22186963260173798, 'epoch': 0.69} + 69%|██████████████████████████████████████████████████████▏ | 473/681 [33:19<09:21, 2.70s/it] 70%|██████████████████████████████████████████████████████▎ | 474/681 [33:22<09:20, 2.71s/it] {'loss': 0.4737, 'grad_norm': 60.656185150146484, 'learning_rate': 1.2947162435741277e-07, 'margin_dpo/margin_mean': 30.685691833496094, 'margin_dpo/margin_std': 25.531770706176758, 'logps/chosen': -76.55195617675781, 'logps/rejected': -144.70611572265625, 'logps/ref_chosen': -52.6119384765625, 'logps/ref_rejected': -90.08041381835938, 'logits/chosen': -0.5783928632736206, 'logits/rejected': -0.5659887790679932, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 30.68568992614746, 'margin_dpo/beta_margin_mean': 3.0685691833496094, 'margin_dpo/beta_margin_std': 2.5612540245056152, 'margin_dpo/beta_margin_grad_mean': -0.16651608049869537, 'margin_dpo/beta_margin_grad_std': 0.22569791972637177, 'epoch': 0.7} + 70%|██████████████████████████████████████████████████████▎ | 474/681 [33:22<09:20, 2.71s/it] 70%|██████████████████████████████████████████████████████▍ | 475/681 [33:25<09:04, 2.64s/it] {'loss': 0.3844, 'grad_norm': 43.59295654296875, 'learning_rate': 1.2834888035828596e-07, 'margin_dpo/margin_mean': 34.429412841796875, 'margin_dpo/margin_std': 30.290939331054688, 'logps/chosen': -63.40654754638672, 'logps/rejected': -145.40371704101562, 'logps/ref_chosen': -42.49519348144531, 'logps/ref_rejected': -90.06295013427734, 'logits/chosen': -0.62577223777771, 'logits/rejected': -0.6225380897521973, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 34.429412841796875, 'margin_dpo/beta_margin_mean': 3.442941188812256, 'margin_dpo/beta_margin_std': 3.0452959537506104, 'margin_dpo/beta_margin_grad_mean': -0.1439659297466278, 'margin_dpo/beta_margin_grad_std': 0.19135436415672302, 'epoch': 0.7} + 70%|██████████████████████████████████████████████████████▍ | 475/681 [33:25<09:04, 2.64s/it] 70%|██████████████████████████████████████████████████████▌ | 476/681 [33:27<09:09, 2.68s/it] {'loss': 0.5114, 'grad_norm': 69.43315124511719, 'learning_rate': 1.2722934197929802e-07, 'margin_dpo/margin_mean': 30.353801727294922, 'margin_dpo/margin_std': 26.741519927978516, 'logps/chosen': -64.73588562011719, 'logps/rejected': -125.85054016113281, 'logps/ref_chosen': -42.949378967285156, 'logps/ref_rejected': -73.71023559570312, 'logits/chosen': -0.5941322445869446, 'logits/rejected': -0.5612877607345581, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 30.353801727294922, 'margin_dpo/beta_margin_mean': 3.0353803634643555, 'margin_dpo/beta_margin_std': 2.6748228073120117, 'margin_dpo/beta_margin_grad_mean': -0.17846056818962097, 'margin_dpo/beta_margin_grad_std': 0.22891968488693237, 'epoch': 0.7} + 70%|██████████████████████████████████████████████████████▌ | 476/681 [33:27<09:09, 2.68s/it] 70%|██████████████████████████████████████████████████████▋ | 477/681 [33:30<09:01, 2.65s/it] {'loss': 0.6021, 'grad_norm': 81.28004455566406, 'learning_rate': 1.2611303872132631e-07, 'margin_dpo/margin_mean': 31.857627868652344, 'margin_dpo/margin_std': 27.68490982055664, 'logps/chosen': -95.98014831542969, 'logps/rejected': -133.20254516601562, 'logps/ref_chosen': -70.77261352539062, 'logps/ref_rejected': -76.13737487792969, 'logits/chosen': -0.6341814994812012, 'logits/rejected': -0.5662086009979248, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 31.857629776000977, 'margin_dpo/beta_margin_mean': 3.185762882232666, 'margin_dpo/beta_margin_std': 2.7865076065063477, 'margin_dpo/beta_margin_grad_mean': -0.15644104778766632, 'margin_dpo/beta_margin_grad_std': 0.24381397664546967, 'epoch': 0.7} + 70%|██████████████████████████████████████████████████████▋ | 477/681 [33:30<09:01, 2.65s/it] 70%|██████████████████████████████████████████████████████▋ | 478/681 [33:33<09:11, 2.72s/it] {'loss': 0.4001, 'grad_norm': 48.535404205322266, 'learning_rate': 1.2500000000000005e-07, 'margin_dpo/margin_mean': 34.497291564941406, 'margin_dpo/margin_std': 29.08106231689453, 'logps/chosen': -61.48149871826172, 'logps/rejected': -139.90025329589844, 'logps/ref_chosen': -41.440513610839844, 'logps/ref_rejected': -85.36196899414062, 'logits/chosen': -0.5645046234130859, 'logits/rejected': -0.5495598316192627, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 34.497291564941406, 'margin_dpo/beta_margin_mean': 3.4497292041778564, 'margin_dpo/beta_margin_std': 2.9467873573303223, 'margin_dpo/beta_margin_grad_mean': -0.14866864681243896, 'margin_dpo/beta_margin_grad_std': 0.18979746103286743, 'epoch': 0.7} + 70%|██████████████████████████████████████████████████████▋ | 478/681 [33:33<09:11, 2.72s/it] 70%|██████████████████████████████████████████████████████▊ | 479/681 [33:35<09:01, 2.68s/it] {'loss': 0.4427, 'grad_norm': 56.268714904785156, 'learning_rate': 1.2389025514492456e-07, 'margin_dpo/margin_mean': 30.368532180786133, 'margin_dpo/margin_std': 22.058135986328125, 'logps/chosen': -79.05259704589844, 'logps/rejected': -150.62954711914062, 'logps/ref_chosen': -53.907920837402344, 'logps/ref_rejected': -95.1163330078125, 'logits/chosen': -0.558883786201477, 'logits/rejected': -0.5508110523223877, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 30.368532180786133, 'margin_dpo/beta_margin_mean': 3.036853313446045, 'margin_dpo/beta_margin_std': 2.2986533641815186, 'margin_dpo/beta_margin_grad_mean': -0.15836496651172638, 'margin_dpo/beta_margin_grad_std': 0.21574333310127258, 'epoch': 0.7} + 70%|██████████████████████████████████████████████████████▊ | 479/681 [33:35<09:01, 2.68s/it] 70%|██████████████████████████████████████████████████████▉ | 480/681 [33:38<08:52, 2.65s/it] {'loss': 0.5051, 'grad_norm': 73.03453826904297, 'learning_rate': 1.227838333989088e-07, 'margin_dpo/margin_mean': 36.273189544677734, 'margin_dpo/margin_std': 31.943330764770508, 'logps/chosen': -84.984619140625, 'logps/rejected': -145.50759887695312, 'logps/ref_chosen': -58.682701110839844, 'logps/ref_rejected': -82.93248748779297, 'logits/chosen': -0.5816048979759216, 'logits/rejected': -0.523268461227417, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 36.273189544677734, 'margin_dpo/beta_margin_mean': 3.627319097518921, 'margin_dpo/beta_margin_std': 3.305205821990967, 'margin_dpo/beta_margin_grad_mean': -0.15481433272361755, 'margin_dpo/beta_margin_grad_std': 0.23069554567337036, 'epoch': 0.7} + 70%|██████████████████████████████████████████████████████▉ | 480/681 [33:38<08:52, 2.65s/it] 71%|███████████████████████████████████████████████████████ | 481/681 [33:40<08:44, 2.62s/it] {'loss': 0.4399, 'grad_norm': 53.513370513916016, 'learning_rate': 1.2168076391719489e-07, 'margin_dpo/margin_mean': 34.74099349975586, 'margin_dpo/margin_std': 26.750259399414062, 'logps/chosen': -79.90770721435547, 'logps/rejected': -152.1048583984375, 'logps/ref_chosen': -54.964271545410156, 'logps/ref_rejected': -92.42044067382812, 'logits/chosen': -0.6167398691177368, 'logits/rejected': -0.5804057121276855, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 34.74099349975586, 'margin_dpo/beta_margin_mean': 3.4740993976593018, 'margin_dpo/beta_margin_std': 2.713843822479248, 'margin_dpo/beta_margin_grad_mean': -0.14174267649650574, 'margin_dpo/beta_margin_grad_std': 0.2210419625043869, 'epoch': 0.71} + 71%|███████████████████████████████████████████████████████ | 481/681 [33:40<08:44, 2.62s/it] 71%|███████████████████████████████████████████████████████▏ | 482/681 [33:43<08:49, 2.66s/it] {'loss': 0.4309, 'grad_norm': 54.49075698852539, 'learning_rate': 1.2058107576668938e-07, 'margin_dpo/margin_mean': 30.073631286621094, 'margin_dpo/margin_std': 25.875329971313477, 'logps/chosen': -89.89315795898438, 'logps/rejected': -140.00283813476562, 'logps/ref_chosen': -67.55347442626953, 'logps/ref_rejected': -87.58953857421875, 'logits/chosen': -0.5958288908004761, 'logits/rejected': -0.5650321841239929, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 30.073631286621094, 'margin_dpo/beta_margin_mean': 3.0073630809783936, 'margin_dpo/beta_margin_std': 2.6090502738952637, 'margin_dpo/beta_margin_grad_mean': -0.1658599078655243, 'margin_dpo/beta_margin_grad_std': 0.1872914731502533, 'epoch': 0.71} + 71%|███████████████████████████████████████████████████████▏ | 482/681 [33:43<08:49, 2.66s/it] 71%|███████████████████████████████████████████████████████▎ | 483/681 [33:46<08:46, 2.66s/it] {'loss': 0.3968, 'grad_norm': 65.8294677734375, 'learning_rate': 1.194847979251979e-07, 'margin_dpo/margin_mean': 35.49970245361328, 'margin_dpo/margin_std': 27.264251708984375, 'logps/chosen': -88.70866394042969, 'logps/rejected': -156.66552734375, 'logps/ref_chosen': -63.32981872558594, 'logps/ref_rejected': -95.78697204589844, 'logits/chosen': -0.6282751560211182, 'logits/rejected': -0.5697331428527832, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 35.49970245361328, 'margin_dpo/beta_margin_mean': 3.5499703884124756, 'margin_dpo/beta_margin_std': 2.7894442081451416, 'margin_dpo/beta_margin_grad_mean': -0.13324548304080963, 'margin_dpo/beta_margin_grad_std': 0.21172069013118744, 'epoch': 0.71} + 71%|███████████████████████████████████████████████████████▎ | 483/681 [33:46<08:46, 2.66s/it] 71%|███████████████████████████████████████████████████████▍ | 484/681 [33:48<08:24, 2.56s/it] {'loss': 0.3678, 'grad_norm': 55.688453674316406, 'learning_rate': 1.1839195928066101e-07, 'margin_dpo/margin_mean': 35.594505310058594, 'margin_dpo/margin_std': 29.648942947387695, 'logps/chosen': -80.87345886230469, 'logps/rejected': -141.7012939453125, 'logps/ref_chosen': -59.13812255859375, 'logps/ref_rejected': -84.37144470214844, 'logits/chosen': -0.6670191287994385, 'logits/rejected': -0.6306544542312622, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 35.594505310058594, 'margin_dpo/beta_margin_mean': 3.559450626373291, 'margin_dpo/beta_margin_std': 3.0208792686462402, 'margin_dpo/beta_margin_grad_mean': -0.1434181034564972, 'margin_dpo/beta_margin_grad_std': 0.1813618689775467, 'epoch': 0.71} + 71%|███████████████████████████████████████████████████████▍ | 484/681 [33:48<08:24, 2.56s/it] 71%|███████████████████████████████████████████████████████▌ | 485/681 [33:51<08:20, 2.55s/it] {'loss': 0.4199, 'grad_norm': 52.00883483886719, 'learning_rate': 1.1730258863039347e-07, 'margin_dpo/margin_mean': 40.278114318847656, 'margin_dpo/margin_std': 32.148040771484375, 'logps/chosen': -77.70271301269531, 'logps/rejected': -162.49534606933594, 'logps/ref_chosen': -58.849571228027344, 'logps/ref_rejected': -103.36408996582031, 'logits/chosen': -0.5754466652870178, 'logits/rejected': -0.5443192720413208, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 40.27811813354492, 'margin_dpo/beta_margin_mean': 4.0278120040893555, 'margin_dpo/beta_margin_std': 3.2175581455230713, 'margin_dpo/beta_margin_grad_mean': -0.14156897366046906, 'margin_dpo/beta_margin_grad_std': 0.2183229923248291, 'epoch': 0.71} + 71%|███████████████████████████████████████████████████████▌ | 485/681 [33:51<08:20, 2.55s/it] 71%|███████████████████████████████████████████████████████▋ | 486/681 [33:53<07:54, 2.43s/it] {'loss': 0.424, 'grad_norm': 66.21233367919922, 'learning_rate': 1.1621671468032493e-07, 'margin_dpo/margin_mean': 38.945411682128906, 'margin_dpo/margin_std': 30.78309440612793, 'logps/chosen': -77.98770904541016, 'logps/rejected': -153.8128204345703, 'logps/ref_chosen': -55.25966262817383, 'logps/ref_rejected': -92.13936614990234, 'logits/chosen': -0.6356394290924072, 'logits/rejected': -0.5828511714935303, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 38.945411682128906, 'margin_dpo/beta_margin_mean': 3.8945412635803223, 'margin_dpo/beta_margin_std': 3.0784053802490234, 'margin_dpo/beta_margin_grad_mean': -0.14394216239452362, 'margin_dpo/beta_margin_grad_std': 0.21945635974407196, 'epoch': 0.71} + 71%|███████████████████████████████████████████████████████▋ | 486/681 [33:53<07:54, 2.43s/it] 72%|███████████████████████████████████████████████████████▊ | 487/681 [33:56<08:12, 2.54s/it] {'loss': 0.3256, 'grad_norm': 49.65977096557617, 'learning_rate': 1.1513436604424378e-07, 'margin_dpo/margin_mean': 37.19938278198242, 'margin_dpo/margin_std': 26.274166107177734, 'logps/chosen': -75.19181060791016, 'logps/rejected': -151.7467041015625, 'logps/ref_chosen': -53.06330871582031, 'logps/ref_rejected': -92.4188232421875, 'logits/chosen': -0.6391937732696533, 'logits/rejected': -0.604444682598114, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 37.19938278198242, 'margin_dpo/beta_margin_mean': 3.7199385166168213, 'margin_dpo/beta_margin_std': 2.6885433197021484, 'margin_dpo/beta_margin_grad_mean': -0.12563364207744598, 'margin_dpo/beta_margin_grad_std': 0.17673608660697937, 'epoch': 0.72} + 72%|███████████████████████████████████████████████████████▊ | 487/681 [33:56<08:12, 2.54s/it] 72%|███████████████████████████████████████████████████████▉ | 488/681 [33:58<08:17, 2.58s/it] {'loss': 0.2845, 'grad_norm': 32.75376510620117, 'learning_rate': 1.1405557124304335e-07, 'margin_dpo/margin_mean': 32.08653259277344, 'margin_dpo/margin_std': 21.324390411376953, 'logps/chosen': -72.79434204101562, 'logps/rejected': -136.65927124023438, 'logps/ref_chosen': -52.228153228759766, 'logps/ref_rejected': -84.00656127929688, 'logits/chosen': -0.5953603386878967, 'logits/rejected': -0.563835859298706, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 32.08653259277344, 'margin_dpo/beta_margin_mean': 3.208653450012207, 'margin_dpo/beta_margin_std': 2.13510799407959, 'margin_dpo/beta_margin_grad_mean': -0.11705981194972992, 'margin_dpo/beta_margin_grad_std': 0.1456899493932724, 'epoch': 0.72} + 72%|███████████████████████████████████████████████████████▉ | 488/681 [33:58<08:17, 2.58s/it] 72%|████████████████████████████████████████████████████████ | 489/681 [34:01<08:14, 2.58s/it] {'loss': 0.4441, 'grad_norm': 55.29383850097656, 'learning_rate': 1.1298035870396985e-07, 'margin_dpo/margin_mean': 31.778709411621094, 'margin_dpo/margin_std': 27.465885162353516, 'logps/chosen': -77.7701416015625, 'logps/rejected': -132.9573516845703, 'logps/ref_chosen': -55.989627838134766, 'logps/ref_rejected': -79.39813232421875, 'logits/chosen': -0.5945910215377808, 'logits/rejected': -0.5459895730018616, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 31.778711318969727, 'margin_dpo/beta_margin_mean': 3.1778712272644043, 'margin_dpo/beta_margin_std': 2.766324520111084, 'margin_dpo/beta_margin_grad_mean': -0.1625826209783554, 'margin_dpo/beta_margin_grad_std': 0.21051008999347687, 'epoch': 0.72} + 72%|████████████████████████████████████████████████████████ | 489/681 [34:01<08:14, 2.58s/it] 72%|████████████████████████████████████████████████████████ | 490/681 [34:04<08:33, 2.69s/it] {'loss': 0.573, 'grad_norm': 67.01080322265625, 'learning_rate': 1.1190875675987355e-07, 'margin_dpo/margin_mean': 31.28500747680664, 'margin_dpo/margin_std': 29.551124572753906, 'logps/chosen': -72.7847900390625, 'logps/rejected': -162.11245727539062, 'logps/ref_chosen': -52.36639404296875, 'logps/ref_rejected': -110.40904998779297, 'logits/chosen': -0.613182783126831, 'logits/rejected': -0.6027116775512695, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 31.28500747680664, 'margin_dpo/beta_margin_mean': 3.1285009384155273, 'margin_dpo/beta_margin_std': 2.9745311737060547, 'margin_dpo/beta_margin_grad_mean': -0.186043843626976, 'margin_dpo/beta_margin_grad_std': 0.23473787307739258, 'epoch': 0.72} + 72%|████████████████████████████████████████████████████████ | 490/681 [34:04<08:33, 2.69s/it] 72%|████████████████████████████████████████████████████████▏ | 491/681 [34:07<08:48, 2.78s/it] {'loss': 0.5801, 'grad_norm': 71.59069061279297, 'learning_rate': 1.1084079364846241e-07, 'margin_dpo/margin_mean': 28.079666137695312, 'margin_dpo/margin_std': 27.83734893798828, 'logps/chosen': -82.98500061035156, 'logps/rejected': -124.22119140625, 'logps/ref_chosen': -60.11626434326172, 'logps/ref_rejected': -73.27278900146484, 'logits/chosen': -0.5881800651550293, 'logits/rejected': -0.5435885190963745, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 28.079666137695312, 'margin_dpo/beta_margin_mean': 2.807966709136963, 'margin_dpo/beta_margin_std': 2.785522699356079, 'margin_dpo/beta_margin_grad_mean': -0.1894461065530777, 'margin_dpo/beta_margin_grad_std': 0.22930499911308289, 'epoch': 0.72} + 72%|████████████████████████████████████████████████████████▏ | 491/681 [34:07<08:48, 2.78s/it] 72%|████████████████████████████████████████████████████████▎ | 492/681 [34:10<08:58, 2.85s/it] {'loss': 0.9317, 'grad_norm': 109.9788589477539, 'learning_rate': 1.097764975115576e-07, 'margin_dpo/margin_mean': 26.099708557128906, 'margin_dpo/margin_std': 29.874317169189453, 'logps/chosen': -77.27084350585938, 'logps/rejected': -122.03599548339844, 'logps/ref_chosen': -53.99418258666992, 'logps/ref_rejected': -72.65962219238281, 'logits/chosen': -0.6198358535766602, 'logits/rejected': -0.5758175849914551, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 26.099708557128906, 'margin_dpo/beta_margin_mean': 2.609971046447754, 'margin_dpo/beta_margin_std': 3.011613368988037, 'margin_dpo/beta_margin_grad_mean': -0.2348533272743225, 'margin_dpo/beta_margin_grad_std': 0.31157541275024414, 'epoch': 0.72} + 72%|████████████████████████████████████████████████████████▎ | 492/681 [34:10<08:58, 2.85s/it] 72%|████████████████████████████████████████████████████████▍ | 493/681 [34:13<08:58, 2.86s/it] {'loss': 0.4661, 'grad_norm': 69.1717529296875, 'learning_rate': 1.0871589639435203e-07, 'margin_dpo/margin_mean': 32.91573715209961, 'margin_dpo/margin_std': 26.319190979003906, 'logps/chosen': -95.62208557128906, 'logps/rejected': -140.3636016845703, 'logps/ref_chosen': -75.49723815917969, 'logps/ref_rejected': -87.32301330566406, 'logits/chosen': -0.6741948127746582, 'logits/rejected': -0.6164962649345398, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 32.91573715209961, 'margin_dpo/beta_margin_mean': 3.2915735244750977, 'margin_dpo/beta_margin_std': 2.6520345211029053, 'margin_dpo/beta_margin_grad_mean': -0.15023410320281982, 'margin_dpo/beta_margin_grad_std': 0.22642172873020172, 'epoch': 0.72} + 72%|████████████████████████████████████████████████████████▍ | 493/681 [34:13<08:58, 2.86s/it] 73%|████████████████████████████████████████████████████████▌ | 494/681 [34:15<08:39, 2.78s/it] {'loss': 0.5108, 'grad_norm': 75.6004867553711, 'learning_rate': 1.0765901824467166e-07, 'margin_dpo/margin_mean': 35.46459197998047, 'margin_dpo/margin_std': 29.650789260864258, 'logps/chosen': -63.30023956298828, 'logps/rejected': -143.49691772460938, 'logps/ref_chosen': -41.35926818847656, 'logps/ref_rejected': -86.09136962890625, 'logits/chosen': -0.5462692379951477, 'logits/rejected': -0.5368998050689697, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 35.46459197998047, 'margin_dpo/beta_margin_mean': 3.546459197998047, 'margin_dpo/beta_margin_std': 3.0083138942718506, 'margin_dpo/beta_margin_grad_mean': -0.1606236845254898, 'margin_dpo/beta_margin_grad_std': 0.23982644081115723, 'epoch': 0.73} + 73%|████████████████████████████████████████████████████████▌ | 494/681 [34:15<08:39, 2.78s/it] 73%|████████████████████████████████████████████████████████▋ | 495/681 [34:18<08:28, 2.73s/it] {'loss': 0.5177, 'grad_norm': 67.52748107910156, 'learning_rate': 1.0660589091223854e-07, 'margin_dpo/margin_mean': 32.37921142578125, 'margin_dpo/margin_std': 27.550233840942383, 'logps/chosen': -84.92739868164062, 'logps/rejected': -145.19595336914062, 'logps/ref_chosen': -63.53507995605469, 'logps/ref_rejected': -91.42443084716797, 'logits/chosen': -0.6319386959075928, 'logits/rejected': -0.5911184549331665, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 32.37921142578125, 'margin_dpo/beta_margin_mean': 3.2379212379455566, 'margin_dpo/beta_margin_std': 2.7765204906463623, 'margin_dpo/beta_margin_grad_mean': -0.15922006964683533, 'margin_dpo/beta_margin_grad_std': 0.22867868840694427, 'epoch': 0.73} + 73%|████████████████████████████████████████████████████████▋ | 495/681 [34:18<08:28, 2.73s/it] 73%|████████████████████████████████████████████████████████▊ | 496/681 [34:21<08:22, 2.72s/it] {'loss': 0.5292, 'grad_norm': 64.98089599609375, 'learning_rate': 1.0555654214793722e-07, 'margin_dpo/margin_mean': 28.696502685546875, 'margin_dpo/margin_std': 25.838706970214844, 'logps/chosen': -96.5443115234375, 'logps/rejected': -136.9782257080078, 'logps/ref_chosen': -72.59192657470703, 'logps/ref_rejected': -84.32933807373047, 'logits/chosen': -0.6621850728988647, 'logits/rejected': -0.6073780655860901, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 28.696502685546875, 'margin_dpo/beta_margin_mean': 2.869650363922119, 'margin_dpo/beta_margin_std': 2.5869693756103516, 'margin_dpo/beta_margin_grad_mean': -0.17409659922122955, 'margin_dpo/beta_margin_grad_std': 0.22257588803768158, 'epoch': 0.73} + 73%|████████████████████████████████████████████████████████▊ | 496/681 [34:21<08:22, 2.72s/it] 73%|████████████████████████████████████████████████████████▉ | 497/681 [34:23<08:16, 2.70s/it] {'loss': 0.613, 'grad_norm': 77.44481658935547, 'learning_rate': 1.0451099960308374e-07, 'margin_dpo/margin_mean': 28.408655166625977, 'margin_dpo/margin_std': 27.508472442626953, 'logps/chosen': -83.82826232910156, 'logps/rejected': -129.9313201904297, 'logps/ref_chosen': -58.593971252441406, 'logps/ref_rejected': -76.28836822509766, 'logits/chosen': -0.6251211166381836, 'logits/rejected': -0.5778101682662964, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 28.408655166625977, 'margin_dpo/beta_margin_mean': 2.8408656120300293, 'margin_dpo/beta_margin_std': 2.808185577392578, 'margin_dpo/beta_margin_grad_mean': -0.20568180084228516, 'margin_dpo/beta_margin_grad_std': 0.23945844173431396, 'epoch': 0.73} + 73%|████████████████████████████████████████████████████████▉ | 497/681 [34:23<08:16, 2.70s/it] 73%|█████████████████████████████████████████████████████████ | 498/681 [34:26<08:18, 2.72s/it] {'loss': 0.5312, 'grad_norm': 67.77000427246094, 'learning_rate': 1.0346929082869641e-07, 'margin_dpo/margin_mean': 30.984760284423828, 'margin_dpo/margin_std': 27.886219024658203, 'logps/chosen': -95.3200912475586, 'logps/rejected': -139.05723571777344, 'logps/ref_chosen': -71.20565795898438, 'logps/ref_rejected': -83.95803833007812, 'logits/chosen': -0.6193152666091919, 'logits/rejected': -0.5879042148590088, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 30.984760284423828, 'margin_dpo/beta_margin_mean': 3.098475933074951, 'margin_dpo/beta_margin_std': 2.798676013946533, 'margin_dpo/beta_margin_grad_mean': -0.17250090837478638, 'margin_dpo/beta_margin_grad_std': 0.23728637397289276, 'epoch': 0.73} + 73%|█████████████████████████████████████████████████████████ | 498/681 [34:26<08:18, 2.72s/it] 73%|█████████████████████████████████████████████████████████▏ | 499/681 [34:29<08:06, 2.67s/it] {'loss': 0.6873, 'grad_norm': 82.45706939697266, 'learning_rate': 1.0243144327477013e-07, 'margin_dpo/margin_mean': 31.553926467895508, 'margin_dpo/margin_std': 30.37271499633789, 'logps/chosen': -74.16600036621094, 'logps/rejected': -155.54342651367188, 'logps/ref_chosen': -51.25519561767578, 'logps/ref_rejected': -101.07870483398438, 'logits/chosen': -0.6282952427864075, 'logits/rejected': -0.6203751564025879, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 31.553926467895508, 'margin_dpo/beta_margin_mean': 3.155392646789551, 'margin_dpo/beta_margin_std': 3.0621047019958496, 'margin_dpo/beta_margin_grad_mean': -0.179952934384346, 'margin_dpo/beta_margin_grad_std': 0.2602365016937256, 'epoch': 0.73} + 73%|█████████████████████████████████████████████████████████▏ | 499/681 [34:29<08:06, 2.67s/it] 73%|█████████████████████████████████████████████████████████▎ | 500/681 [34:31<07:52, 2.61s/it] {'loss': 0.3799, 'grad_norm': 45.40144729614258, 'learning_rate': 1.0139748428955333e-07, 'margin_dpo/margin_mean': 33.82952117919922, 'margin_dpo/margin_std': 29.12575340270996, 'logps/chosen': -82.48162841796875, 'logps/rejected': -153.21792602539062, 'logps/ref_chosen': -57.027442932128906, 'logps/ref_rejected': -93.93421173095703, 'logits/chosen': -0.6029895544052124, 'logits/rejected': -0.5873157382011414, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 33.82952117919922, 'margin_dpo/beta_margin_mean': 3.3829522132873535, 'margin_dpo/beta_margin_std': 2.9376726150512695, 'margin_dpo/beta_margin_grad_mean': -0.13710999488830566, 'margin_dpo/beta_margin_grad_std': 0.19010357558727264, 'epoch': 0.73} + 73%|█████████████████████████████████████████████████████████▎ | 500/681 [34:31<07:52, 2.61s/it][INFO|trainer.py:4307] 2026-04-17 22:01:02,011 >> +***** Running Evaluation ***** +[INFO|trainer.py:4309] 2026-04-17 22:01:02,011 >> Num examples = 2339 +[INFO|trainer.py:4312] 2026-04-17 22:01:02,011 >> Batch size = 8 + + 0%| | 0/73 [00:00> +***** Running Evaluation ***** +[INFO|trainer.py:4309] 2026-04-17 22:06:11,319 >> Num examples = 2339 +[INFO|trainer.py:4312] 2026-04-17 22:06:11,319 >> Batch size = 8 + + 0%| | 0/73 [00:00> Saving model checkpoint to /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/checkpoint-600 +[INFO|configuration_utils.py:419] 2026-04-17 22:07:07,760 >> Configuration saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/checkpoint-600/config.json +[INFO|configuration_utils.py:911] 2026-04-17 22:07:07,777 >> Configuration saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/checkpoint-600/generation_config.json +[INFO|modeling_utils.py:3580] 2026-04-17 22:08:05,733 >> The model is bigger than the maximum size per checkpoint (5GB) and is going to be split in 6 checkpoint shards. You can find where each parameters has been saved in the index located at /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/checkpoint-600/model.safetensors.index.json. +[INFO|tokenization_utils_base.py:2510] 2026-04-17 22:08:05,740 >> tokenizer config file saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/checkpoint-600/tokenizer_config.json +[INFO|tokenization_utils_base.py:2519] 2026-04-17 22:08:05,745 >> Special tokens file saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/checkpoint-600/special_tokens_map.json +[INFO|trainer.py:4083] 2026-04-17 22:11:52,979 >> Deleting older checkpoint [/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/checkpoint-200] due to args.save_total_limit + 88%|██████████████████████████████████████████████████████████████████▏ | 601/681 [45:27<2:21:06, 105.84s/it] {'loss': 0.5251, 'grad_norm': 82.41748046875, 'learning_rate': 2.1301532877994742e-08, 'margin_dpo/margin_mean': 34.12665939331055, 'margin_dpo/margin_std': 28.886859893798828, 'logps/chosen': -85.03898620605469, 'logps/rejected': -154.72296142578125, 'logps/ref_chosen': -59.13360595703125, 'logps/ref_rejected': -94.69093322753906, 'logits/chosen': -0.6234362125396729, 'logits/rejected': -0.595230758190155, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 34.12665939331055, 'margin_dpo/beta_margin_mean': 3.412665843963623, 'margin_dpo/beta_margin_std': 2.932499885559082, 'margin_dpo/beta_margin_grad_mean': -0.15890392661094666, 'margin_dpo/beta_margin_grad_std': 0.24525830149650574, 'epoch': 0.88} + 88%|██████████████████████████████████████████████████████████████████▏ | 601/681 [45:27<2:21:06, 105.84s/it] 88%|███████████████████████████████████████████████████████████████████▏ | 602/681 [45:29<1:38:31, 74.84s/it] {'loss': 0.3531, 'grad_norm': 68.01284790039062, 'learning_rate': 2.0786184285784298e-08, 'margin_dpo/margin_mean': 37.70860290527344, 'margin_dpo/margin_std': 27.594802856445312, 'logps/chosen': -66.78749084472656, 'logps/rejected': -143.57113647460938, 'logps/ref_chosen': -48.59352111816406, 'logps/ref_rejected': -87.6685562133789, 'logits/chosen': -0.6261130571365356, 'logits/rejected': -0.626197338104248, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 37.70860290527344, 'margin_dpo/beta_margin_mean': 3.770860195159912, 'margin_dpo/beta_margin_std': 2.8099164962768555, 'margin_dpo/beta_margin_grad_mean': -0.12587696313858032, 'margin_dpo/beta_margin_grad_std': 0.19796113669872284, 'epoch': 0.88} + 88%|███████████████████████████████████████████████████████████████████▏ | 602/681 [45:29<1:38:31, 74.84s/it] 89%|███████████████████████████████████████████████████████████████████▎ | 603/681 [45:32<1:09:03, 53.12s/it] {'loss': 0.4719, 'grad_norm': 65.49505615234375, 'learning_rate': 2.0276875690788204e-08, 'margin_dpo/margin_mean': 32.04313659667969, 'margin_dpo/margin_std': 26.230998992919922, 'logps/chosen': -90.7020263671875, 'logps/rejected': -152.65615844726562, 'logps/ref_chosen': -70.41461944580078, 'logps/ref_rejected': -100.32560729980469, 'logits/chosen': -0.637772262096405, 'logits/rejected': -0.5984662175178528, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 32.04313659667969, 'margin_dpo/beta_margin_mean': 3.2043137550354004, 'margin_dpo/beta_margin_std': 2.628369092941284, 'margin_dpo/beta_margin_grad_mean': -0.1628403216600418, 'margin_dpo/beta_margin_grad_std': 0.2238980382680893, 'epoch': 0.89} + 89%|███████████████████████████████████████████████████████████████████▎ | 603/681 [45:32<1:09:03, 53.12s/it] 89%|█████████████████████████████████████████████████████████████████████▏ | 604/681 [45:34<48:44, 37.98s/it] {'loss': 0.4607, 'grad_norm': 64.45735931396484, 'learning_rate': 1.977362051376158e-08, 'margin_dpo/margin_mean': 35.393802642822266, 'margin_dpo/margin_std': 29.511133193969727, 'logps/chosen': -65.24546813964844, 'logps/rejected': -146.03567504882812, 'logps/ref_chosen': -46.45808029174805, 'logps/ref_rejected': -91.8544921875, 'logits/chosen': -0.5643373727798462, 'logits/rejected': -0.5535662770271301, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 35.393802642822266, 'margin_dpo/beta_margin_mean': 3.5393803119659424, 'margin_dpo/beta_margin_std': 2.988723039627075, 'margin_dpo/beta_margin_grad_mean': -0.14535552263259888, 'margin_dpo/beta_margin_grad_std': 0.2219676375389099, 'epoch': 0.89} + 89%|█████████████████████████████████████████████████████████████████████▏ | 604/681 [45:34<48:44, 37.98s/it] 89%|█████████████████████████████████████████████████████████████████████▎ | 605/681 [45:37<34:40, 27.38s/it] {'loss': 0.4569, 'grad_norm': 62.30309295654297, 'learning_rate': 1.9276432015946446e-08, 'margin_dpo/margin_mean': 31.68695831298828, 'margin_dpo/margin_std': 29.331512451171875, 'logps/chosen': -90.84162139892578, 'logps/rejected': -158.584228515625, 'logps/ref_chosen': -66.24933624267578, 'logps/ref_rejected': -102.30496978759766, 'logits/chosen': -0.6186962127685547, 'logits/rejected': -0.603484034538269, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 31.686960220336914, 'margin_dpo/beta_margin_mean': 3.168696165084839, 'margin_dpo/beta_margin_std': 3.0381813049316406, 'margin_dpo/beta_margin_grad_mean': -0.1487942636013031, 'margin_dpo/beta_margin_grad_std': 0.1991681158542633, 'epoch': 0.89} + 89%|█████████████████████████████████████████████████████████████████████▎ | 605/681 [45:37<34:40, 27.38s/it] 89%|█████████████████████████████████████████████████████████████████████▍ | 606/681 [45:39<24:51, 19.89s/it] {'loss': 0.2954, 'grad_norm': 44.491546630859375, 'learning_rate': 1.8785323298722093e-08, 'margin_dpo/margin_mean': 36.6777458190918, 'margin_dpo/margin_std': 25.4649658203125, 'logps/chosen': -76.80615234375, 'logps/rejected': -157.0362548828125, 'logps/ref_chosen': -54.819122314453125, 'logps/ref_rejected': -98.37147521972656, 'logits/chosen': -0.5961008071899414, 'logits/rejected': -0.564789354801178, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 36.67774963378906, 'margin_dpo/beta_margin_mean': 3.6677749156951904, 'margin_dpo/beta_margin_std': 2.649290084838867, 'margin_dpo/beta_margin_grad_mean': -0.12030400335788727, 'margin_dpo/beta_margin_grad_std': 0.1559758484363556, 'epoch': 0.89} + 89%|█████████████████████████████████████████████████████████████████████▍ | 606/681 [45:39<24:51, 19.89s/it] 89%|█████████████████████████████████████████████████████████████████████▌ | 607/681 [45:42<18:14, 14.79s/it] {'loss': 0.3387, 'grad_norm': 50.320865631103516, 'learning_rate': 1.8300307303259904e-08, 'margin_dpo/margin_mean': 32.44065856933594, 'margin_dpo/margin_std': 23.566665649414062, 'logps/chosen': -79.11578369140625, 'logps/rejected': -133.24951171875, 'logps/ref_chosen': -58.08403778076172, 'logps/ref_rejected': -79.777099609375, 'logits/chosen': -0.5963802337646484, 'logits/rejected': -0.5623406171798706, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 32.44065856933594, 'margin_dpo/beta_margin_mean': 3.244065999984741, 'margin_dpo/beta_margin_std': 2.3698348999023438, 'margin_dpo/beta_margin_grad_mean': -0.13021884858608246, 'margin_dpo/beta_margin_grad_std': 0.17474402487277985, 'epoch': 0.89} + 89%|█████████████████████████████████████████████████████████████████████▌ | 607/681 [45:42<18:14, 14.79s/it] 89%|█████████████████████████████████████████████████████████████████████▋ | 608/681 [45:45<13:28, 11.07s/it] {'loss': 0.4835, 'grad_norm': 58.68054962158203, 'learning_rate': 1.7821396810182437e-08, 'margin_dpo/margin_mean': 33.237815856933594, 'margin_dpo/margin_std': 26.23067855834961, 'logps/chosen': -78.17123413085938, 'logps/rejected': -148.73159790039062, 'logps/ref_chosen': -57.450836181640625, 'logps/ref_rejected': -94.77339172363281, 'logits/chosen': -0.6197670698165894, 'logits/rejected': -0.5876868963241577, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 33.237815856933594, 'margin_dpo/beta_margin_mean': 3.323781728744507, 'margin_dpo/beta_margin_std': 2.665648937225342, 'margin_dpo/beta_margin_grad_mean': -0.15202751755714417, 'margin_dpo/beta_margin_grad_std': 0.22783887386322021, 'epoch': 0.89} + 89%|█████████████████████████████████████████████████████████████████████▋ | 608/681 [45:45<13:28, 11.07s/it] 89%|█████████████████████████████████████████████████████████████████████▊ | 609/681 [45:47<10:07, 8.44s/it] {'loss': 0.3479, 'grad_norm': 64.48681640625, 'learning_rate': 1.7348604439226617e-08, 'margin_dpo/margin_mean': 33.63862228393555, 'margin_dpo/margin_std': 23.823345184326172, 'logps/chosen': -81.96639251708984, 'logps/rejected': -145.61566162109375, 'logps/ref_chosen': -58.805355072021484, 'logps/ref_rejected': -88.81600952148438, 'logits/chosen': -0.642276406288147, 'logits/rejected': -0.6062139272689819, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 33.63862228393555, 'margin_dpo/beta_margin_mean': 3.3638622760772705, 'margin_dpo/beta_margin_std': 2.390188694000244, 'margin_dpo/beta_margin_grad_mean': -0.12766654789447784, 'margin_dpo/beta_margin_grad_std': 0.18546564877033234, 'epoch': 0.89} + 89%|█████████████████████████████████████████████████████████████████████▊ | 609/681 [45:47<10:07, 8.44s/it] 90%|█████████████████████████████████████████████████████████████████████▊ | 610/681 [45:49<07:50, 6.63s/it] {'loss': 0.4533, 'grad_norm': 74.75220489501953, 'learning_rate': 1.6881942648911074e-08, 'margin_dpo/margin_mean': 32.680015563964844, 'margin_dpo/margin_std': 24.990657806396484, 'logps/chosen': -90.24623107910156, 'logps/rejected': -140.6365966796875, 'logps/ref_chosen': -65.69503784179688, 'logps/ref_rejected': -83.4053955078125, 'logits/chosen': -0.6093329191207886, 'logits/rejected': -0.5498037934303284, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 32.680015563964844, 'margin_dpo/beta_margin_mean': 3.2680017948150635, 'margin_dpo/beta_margin_std': 2.5850718021392822, 'margin_dpo/beta_margin_grad_mean': -0.15364539623260498, 'margin_dpo/beta_margin_grad_std': 0.21927115321159363, 'epoch': 0.9} + 90%|█████████████████████████████████████████████████████████████████████▊ | 610/681 [45:49<07:50, 6.63s/it] 90%|█████████████████████████████████████████████████████████████████████▉ | 611/681 [45:52<06:16, 5.38s/it] {'loss': 0.3916, 'grad_norm': 52.49784851074219, 'learning_rate': 1.6421423736208e-08, 'margin_dpo/margin_mean': 35.877716064453125, 'margin_dpo/margin_std': 27.9959774017334, 'logps/chosen': -74.63272094726562, 'logps/rejected': -144.24195861816406, 'logps/ref_chosen': -52.59947204589844, 'logps/ref_rejected': -86.33099365234375, 'logits/chosen': -0.6183408498764038, 'logits/rejected': -0.580921471118927, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 35.877716064453125, 'margin_dpo/beta_margin_mean': 3.587771415710449, 'margin_dpo/beta_margin_std': 2.8050875663757324, 'margin_dpo/beta_margin_grad_mean': -0.1446484923362732, 'margin_dpo/beta_margin_grad_std': 0.1988787204027176, 'epoch': 0.9} + 90%|█████████████████████████████████████████████████████████████████████▉ | 611/681 [45:52<06:16, 5.38s/it] 90%|██████████████████████████████████████████████████████████████████████ | 612/681 [45:55<05:15, 4.57s/it] {'loss': 0.2722, 'grad_norm': 44.11368179321289, 'learning_rate': 1.5967059836219042e-08, 'margin_dpo/margin_mean': 40.94614028930664, 'margin_dpo/margin_std': 27.57101058959961, 'logps/chosen': -80.20808410644531, 'logps/rejected': -150.14288330078125, 'logps/ref_chosen': -59.32372283935547, 'logps/ref_rejected': -88.31239318847656, 'logits/chosen': -0.6275376081466675, 'logits/rejected': -0.5670713782310486, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 40.94614028930664, 'margin_dpo/beta_margin_mean': 4.094614028930664, 'margin_dpo/beta_margin_std': 2.759153127670288, 'margin_dpo/beta_margin_grad_mean': -0.10046197474002838, 'margin_dpo/beta_margin_grad_std': 0.16786958277225494, 'epoch': 0.9} + 90%|██████████████████████████████████████████████████████████████████████ | 612/681 [45:55<05:15, 4.57s/it] 90%|██████████████████████████████████████████████████████████████████████▏ | 613/681 [45:57<04:29, 3.97s/it] {'loss': 0.3659, 'grad_norm': 51.012901306152344, 'learning_rate': 1.551886292185553e-08, 'margin_dpo/margin_mean': 35.77855682373047, 'margin_dpo/margin_std': 27.143339157104492, 'logps/chosen': -80.63017272949219, 'logps/rejected': -161.78628540039062, 'logps/ref_chosen': -59.72996520996094, 'logps/ref_rejected': -105.10753631591797, 'logits/chosen': -0.6327238082885742, 'logits/rejected': -0.6284672021865845, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 35.77855682373047, 'margin_dpo/beta_margin_mean': 3.5778555870056152, 'margin_dpo/beta_margin_std': 2.7456395626068115, 'margin_dpo/beta_margin_grad_mean': -0.13130351901054382, 'margin_dpo/beta_margin_grad_std': 0.2031707614660263, 'epoch': 0.9} + 90%|██████████████████████████████████████████████████████████████████████▏ | 613/681 [45:57<04:29, 3.97s/it] 90%|██████████████████████████████████████████████████████████████████████▎ | 614/681 [46:00<03:59, 3.57s/it] {'loss': 0.3031, 'grad_norm': 48.471588134765625, 'learning_rate': 1.507684480352292e-08, 'margin_dpo/margin_mean': 35.7594108581543, 'margin_dpo/margin_std': 25.549396514892578, 'logps/chosen': -76.522705078125, 'logps/rejected': -164.02252197265625, 'logps/ref_chosen': -52.93898010253906, 'logps/ref_rejected': -104.67938232421875, 'logits/chosen': -0.5755459070205688, 'logits/rejected': -0.5698869824409485, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 35.7594108581543, 'margin_dpo/beta_margin_mean': 3.5759410858154297, 'margin_dpo/beta_margin_std': 2.556734800338745, 'margin_dpo/beta_margin_grad_mean': -0.11619433760643005, 'margin_dpo/beta_margin_grad_std': 0.1717434674501419, 'epoch': 0.9} + 90%|██████████████████████████████████████████████████████████████████████▎ | 614/681 [46:00<03:59, 3.57s/it] 90%|██████████████████████████████████████████████████████████████████████▍ | 615/681 [46:02<03:36, 3.29s/it] {'loss': 0.4035, 'grad_norm': 41.79697799682617, 'learning_rate': 1.4641017128809801e-08, 'margin_dpo/margin_mean': 30.23415184020996, 'margin_dpo/margin_std': 22.966224670410156, 'logps/chosen': -86.97941589355469, 'logps/rejected': -146.57379150390625, 'logps/ref_chosen': -65.81727600097656, 'logps/ref_rejected': -95.17749786376953, 'logits/chosen': -0.5839822292327881, 'logits/rejected': -0.5518302917480469, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 30.234153747558594, 'margin_dpo/beta_margin_mean': 3.0234153270721436, 'margin_dpo/beta_margin_std': 2.3445184230804443, 'margin_dpo/beta_margin_grad_mean': -0.15672753751277924, 'margin_dpo/beta_margin_grad_std': 0.1847524344921112, 'epoch': 0.9} + 90%|██████████████████████████████████████████████████████████████████████▍ | 615/681 [46:02<03:36, 3.29s/it] 90%|██████████████████████████████████████████████████████████████████████▌ | 616/681 [46:05<03:20, 3.09s/it] {'loss': 0.5016, 'grad_norm': 72.02803039550781, 'learning_rate': 1.4211391382180637e-08, 'margin_dpo/margin_mean': 32.59221649169922, 'margin_dpo/margin_std': 29.501014709472656, 'logps/chosen': -88.5474853515625, 'logps/rejected': -130.7073516845703, 'logps/ref_chosen': -65.13285827636719, 'logps/ref_rejected': -74.70050048828125, 'logits/chosen': -0.613810122013092, 'logits/rejected': -0.5613222122192383, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 32.59221649169922, 'margin_dpo/beta_margin_mean': 3.2592217922210693, 'margin_dpo/beta_margin_std': 3.033946990966797, 'margin_dpo/beta_margin_grad_mean': -0.16017135977745056, 'margin_dpo/beta_margin_grad_std': 0.230714350938797, 'epoch': 0.9} + 90%|██████████████████████████████████████████████████████████████████████▌ | 616/681 [46:05<03:20, 3.09s/it] 91%|██████████████████████████████████████████████████████████████████████▋ | 617/681 [46:08<03:10, 2.97s/it] {'loss': 0.3826, 'grad_norm': 54.11730194091797, 'learning_rate': 1.378797888467345e-08, 'margin_dpo/margin_mean': 29.974029541015625, 'margin_dpo/margin_std': 23.434463500976562, 'logps/chosen': -87.65239715576172, 'logps/rejected': -118.85501098632812, 'logps/ref_chosen': -63.005550384521484, 'logps/ref_rejected': -64.234130859375, 'logits/chosen': -0.5736366510391235, 'logits/rejected': -0.5296716094017029, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 29.974029541015625, 'margin_dpo/beta_margin_mean': 2.9974029064178467, 'margin_dpo/beta_margin_std': 2.35481333732605, 'margin_dpo/beta_margin_grad_mean': -0.14828212559223175, 'margin_dpo/beta_margin_grad_std': 0.17800341546535492, 'epoch': 0.91} + 91%|██████████████████████████████████████████████████████████████████████▋ | 617/681 [46:08<03:10, 2.97s/it] 91%|██████████████████████████████████████████████████████████████████████▊ | 618/681 [46:10<03:03, 2.92s/it] {'loss': 0.4624, 'grad_norm': 67.99271392822266, 'learning_rate': 1.3370790793601371e-08, 'margin_dpo/margin_mean': 30.859840393066406, 'margin_dpo/margin_std': 26.370765686035156, 'logps/chosen': -90.81625366210938, 'logps/rejected': -146.72813415527344, 'logps/ref_chosen': -67.10135650634766, 'logps/ref_rejected': -92.15339660644531, 'logits/chosen': -0.6390504837036133, 'logits/rejected': -0.610953152179718, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 30.859838485717773, 'margin_dpo/beta_margin_mean': 3.085983991622925, 'margin_dpo/beta_margin_std': 2.6704392433166504, 'margin_dpo/beta_margin_grad_mean': -0.16537515819072723, 'margin_dpo/beta_margin_grad_std': 0.21399806439876556, 'epoch': 0.91} + 91%|██████████████████████████████████████████████████████████████████████▊ | 618/681 [46:10<03:03, 2.92s/it] 91%|██████████████████████████████████████████████████████████████████████▉ | 619/681 [46:13<02:55, 2.82s/it] {'loss': 0.4702, 'grad_norm': 55.240272521972656, 'learning_rate': 1.2959838102258535e-08, 'margin_dpo/margin_mean': 32.92374038696289, 'margin_dpo/margin_std': 29.776756286621094, 'logps/chosen': -79.01873779296875, 'logps/rejected': -149.14964294433594, 'logps/ref_chosen': -55.978233337402344, 'logps/ref_rejected': -93.1854019165039, 'logits/chosen': -0.5689994096755981, 'logits/rejected': -0.5356103777885437, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 32.92374038696289, 'margin_dpo/beta_margin_mean': 3.2923738956451416, 'margin_dpo/beta_margin_std': 3.01572847366333, 'margin_dpo/beta_margin_grad_mean': -0.16712483763694763, 'margin_dpo/beta_margin_grad_std': 0.21881355345249176, 'epoch': 0.91} + 91%|██████████████████████████████████████████████████████████████████████▉ | 619/681 [46:13<02:55, 2.82s/it] 91%|███████████████████████████████████████████████████████████████████████ | 620/681 [46:16<02:46, 2.73s/it] {'loss': 0.2579, 'grad_norm': 34.842933654785156, 'learning_rate': 1.2555131639630567e-08, 'margin_dpo/margin_mean': 35.61638259887695, 'margin_dpo/margin_std': 25.934829711914062, 'logps/chosen': -79.86566162109375, 'logps/rejected': -134.0952911376953, 'logps/ref_chosen': -59.79750061035156, 'logps/ref_rejected': -78.41075134277344, 'logits/chosen': -0.6340548396110535, 'logits/rejected': -0.5965070724487305, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 35.61638259887695, 'margin_dpo/beta_margin_mean': 3.561638355255127, 'margin_dpo/beta_margin_std': 2.65966534614563, 'margin_dpo/beta_margin_grad_mean': -0.10915657132863998, 'margin_dpo/beta_margin_grad_std': 0.13496600091457367, 'epoch': 0.91} + 91%|███████████████████████████████████████████████████████████████████████ | 620/681 [46:16<02:46, 2.73s/it] 91%|███████████████████████████████████████████████████████████████████████▏ | 621/681 [46:18<02:46, 2.77s/it] {'loss': 0.3092, 'grad_norm': 40.03609848022461, 'learning_rate': 1.2156682070109086e-08, 'margin_dpo/margin_mean': 36.26169967651367, 'margin_dpo/margin_std': 26.822023391723633, 'logps/chosen': -72.59913635253906, 'logps/rejected': -143.29660034179688, 'logps/ref_chosen': -53.933753967285156, 'logps/ref_rejected': -88.36952209472656, 'logits/chosen': -0.6073925495147705, 'logits/rejected': -0.5800847411155701, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 36.26169967651367, 'margin_dpo/beta_margin_mean': 3.6261699199676514, 'margin_dpo/beta_margin_std': 2.691709041595459, 'margin_dpo/beta_margin_grad_mean': -0.10899462550878525, 'margin_dpo/beta_margin_grad_std': 0.1772289127111435, 'epoch': 0.91} + 91%|███████████████████████████████████████████████████████████████████████▏ | 621/681 [46:18<02:46, 2.77s/it] 91%|███████████████████████████████████████████████████████████████████████▏ | 622/681 [46:21<02:43, 2.76s/it] {'loss': 0.3854, 'grad_norm': 48.39630889892578, 'learning_rate': 1.1764499893210878e-08, 'margin_dpo/margin_mean': 36.819705963134766, 'margin_dpo/margin_std': 28.56911277770996, 'logps/chosen': -82.65914916992188, 'logps/rejected': -144.71177673339844, 'logps/ref_chosen': -60.28582000732422, 'logps/ref_rejected': -85.51873779296875, 'logits/chosen': -0.5869364142417908, 'logits/rejected': -0.5320132970809937, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 36.819705963134766, 'margin_dpo/beta_margin_mean': 3.6819705963134766, 'margin_dpo/beta_margin_std': 2.8990466594696045, 'margin_dpo/beta_margin_grad_mean': -0.1378306895494461, 'margin_dpo/beta_margin_grad_std': 0.1983867883682251, 'epoch': 0.91} + 91%|███████████████████████████████████████████████████████████████████████▏ | 622/681 [46:21<02:43, 2.76s/it] 91%|███████████████████████████████████████████████████████████████████████▎ | 623/681 [46:23<02:29, 2.58s/it] {'loss': 0.5541, 'grad_norm': 73.08351135253906, 'learning_rate': 1.1378595443300998e-08, 'margin_dpo/margin_mean': 30.37273406982422, 'margin_dpo/margin_std': 28.88761329650879, 'logps/chosen': -88.72175598144531, 'logps/rejected': -140.02056884765625, 'logps/ref_chosen': -64.15696716308594, 'logps/ref_rejected': -85.08304595947266, 'logits/chosen': -0.6507315635681152, 'logits/rejected': -0.6161798238754272, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 30.37273597717285, 'margin_dpo/beta_margin_mean': 3.037273645401001, 'margin_dpo/beta_margin_std': 2.930415630340576, 'margin_dpo/beta_margin_grad_mean': -0.18612176179885864, 'margin_dpo/beta_margin_grad_std': 0.23363880813121796, 'epoch': 0.91} + 91%|███████████████████████████████████████████████████████████████████████▎ | 623/681 [46:23<02:29, 2.58s/it] 92%|███████████████████████████████████████████████████████████████████████▍ | 624/681 [46:26<02:33, 2.69s/it] {'loss': 0.4965, 'grad_norm': 71.18040466308594, 'learning_rate': 1.0998978889320582e-08, 'margin_dpo/margin_mean': 37.318939208984375, 'margin_dpo/margin_std': 27.622631072998047, 'logps/chosen': -94.83811950683594, 'logps/rejected': -157.37045288085938, 'logps/ref_chosen': -71.91862487792969, 'logps/ref_rejected': -97.13203430175781, 'logits/chosen': -0.6796859502792358, 'logits/rejected': -0.6095322966575623, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 37.31894302368164, 'margin_dpo/beta_margin_mean': 3.7318942546844482, 'margin_dpo/beta_margin_std': 2.772662878036499, 'margin_dpo/beta_margin_grad_mean': -0.14844343066215515, 'margin_dpo/beta_margin_grad_std': 0.25005990266799927, 'epoch': 0.92} + 92%|███████████████████████████████████████████████████████████████████████▍ | 624/681 [46:26<02:33, 2.69s/it] 92%|███████████████████████████████████████████████████████████████████████▌ | 625/681 [46:29<02:33, 2.74s/it] {'loss': 0.3591, 'grad_norm': 49.115333557128906, 'learning_rate': 1.0625660234518913e-08, 'margin_dpo/margin_mean': 35.52130889892578, 'margin_dpo/margin_std': 28.529512405395508, 'logps/chosen': -81.66363525390625, 'logps/rejected': -144.93325805664062, 'logps/ref_chosen': -58.342071533203125, 'logps/ref_rejected': -86.09038543701172, 'logits/chosen': -0.59247887134552, 'logits/rejected': -0.5529348850250244, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 35.52130889892578, 'margin_dpo/beta_margin_mean': 3.552130937576294, 'margin_dpo/beta_margin_std': 2.8531718254089355, 'margin_dpo/beta_margin_grad_mean': -0.13468137383460999, 'margin_dpo/beta_margin_grad_std': 0.18700142204761505, 'epoch': 0.92} + 92%|███████████████████████████████████████████████████████████████████████▌ | 625/681 [46:29<02:33, 2.74s/it] 92%|███████████████████████████████████████████████████████████████████████▋ | 626/681 [46:32<02:30, 2.74s/it] {'loss': 0.5253, 'grad_norm': 63.880088806152344, 'learning_rate': 1.0258649316189721e-08, 'margin_dpo/margin_mean': 30.09228515625, 'margin_dpo/margin_std': 28.44098472595215, 'logps/chosen': -98.9983139038086, 'logps/rejected': -153.16671752929688, 'logps/ref_chosen': -75.11260986328125, 'logps/ref_rejected': -99.18872833251953, 'logits/chosen': -0.5680443644523621, 'logits/rejected': -0.5336043834686279, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 30.09228515625, 'margin_dpo/beta_margin_mean': 3.009228467941284, 'margin_dpo/beta_margin_std': 2.89654541015625, 'margin_dpo/beta_margin_grad_mean': -0.18730950355529785, 'margin_dpo/beta_margin_grad_std': 0.21887990832328796, 'epoch': 0.92} + 92%|███████████████████████████████████████████████████████████████████████▋ | 626/681 [46:32<02:30, 2.74s/it] 92%|███████████████████████████████████████████████████████████████████████▊ | 627/681 [46:35<02:28, 2.74s/it] {'loss': 0.6048, 'grad_norm': 78.66019439697266, 'learning_rate': 9.897955805412e-09, 'margin_dpo/margin_mean': 33.95563507080078, 'margin_dpo/margin_std': 34.049468994140625, 'logps/chosen': -69.19841003417969, 'logps/rejected': -162.16537475585938, 'logps/ref_chosen': -47.74314880371094, 'logps/ref_rejected': -106.75448608398438, 'logits/chosen': -0.579108476638794, 'logits/rejected': -0.587154746055603, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 33.95563507080078, 'margin_dpo/beta_margin_mean': 3.3955633640289307, 'margin_dpo/beta_margin_std': 3.4090704917907715, 'margin_dpo/beta_margin_grad_mean': -0.18939301371574402, 'margin_dpo/beta_margin_grad_std': 0.2481917440891266, 'epoch': 0.92} + 92%|███████████████████████████████████████████████████████████████████████▊ | 627/681 [46:35<02:28, 2.74s/it] 92%|███████████████████████████████████████████████████████████████████████▉ | 628/681 [46:37<02:23, 2.70s/it] {'loss': 0.3001, 'grad_norm': 41.49999237060547, 'learning_rate': 9.543589206795238e-09, 'margin_dpo/margin_mean': 35.616455078125, 'margin_dpo/margin_std': 25.80486297607422, 'logps/chosen': -82.25130462646484, 'logps/rejected': -159.23948669433594, 'logps/ref_chosen': -60.182945251464844, 'logps/ref_rejected': -101.55467224121094, 'logits/chosen': -0.6199311017990112, 'logits/rejected': -0.6005183458328247, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 35.616455078125, 'margin_dpo/beta_margin_mean': 3.5616455078125, 'margin_dpo/beta_margin_std': 2.615797758102417, 'margin_dpo/beta_margin_grad_mean': -0.12019230425357819, 'margin_dpo/beta_margin_grad_std': 0.15883654356002808, 'epoch': 0.92} + 92%|███████████████████████████████████████████████████████████████████████▉ | 628/681 [46:37<02:23, 2.70s/it] 92%|████████████████████████████████████████████████████████████████████████ | 629/681 [46:40<02:20, 2.71s/it] {'loss': 0.4054, 'grad_norm': 62.908477783203125, 'learning_rate': 9.19555885822887e-09, 'margin_dpo/margin_mean': 31.810806274414062, 'margin_dpo/margin_std': 25.02639389038086, 'logps/chosen': -86.42594909667969, 'logps/rejected': -145.6768798828125, 'logps/ref_chosen': -64.21353912353516, 'logps/ref_rejected': -91.65367126464844, 'logits/chosen': -0.6567898392677307, 'logits/rejected': -0.6142420768737793, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 31.81080436706543, 'margin_dpo/beta_margin_mean': 3.1810803413391113, 'margin_dpo/beta_margin_std': 2.5537304878234863, 'margin_dpo/beta_margin_grad_mean': -0.1407935917377472, 'margin_dpo/beta_margin_grad_std': 0.19307489693164825, 'epoch': 0.92} + 92%|████████████████████████████████████████████████████████████████████████ | 629/681 [46:40<02:20, 2.71s/it] 93%|████████████████████████████████████████████████████████████████████████▏ | 630/681 [46:43<02:17, 2.69s/it] {'loss': 0.461, 'grad_norm': 60.66549301147461, 'learning_rate': 8.85387393063622e-09, 'margin_dpo/margin_mean': 30.402843475341797, 'margin_dpo/margin_std': 25.565155029296875, 'logps/chosen': -79.63174438476562, 'logps/rejected': -134.34188842773438, 'logps/ref_chosen': -59.29100036621094, 'logps/ref_rejected': -83.59829711914062, 'logits/chosen': -0.6706698536872864, 'logits/rejected': -0.6243743896484375, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 30.402841567993164, 'margin_dpo/beta_margin_mean': 3.0402843952178955, 'margin_dpo/beta_margin_std': 2.556612014770508, 'margin_dpo/beta_margin_grad_mean': -0.1645456999540329, 'margin_dpo/beta_margin_grad_std': 0.20726469159126282, 'epoch': 0.93} + 93%|████████████████████████████████████████████████████████████████████████▏ | 630/681 [46:43<02:17, 2.69s/it] 93%|████████████████████████████████████████████████████████████████████████▎ | 631/681 [46:45<02:11, 2.62s/it] {'loss': 0.7356, 'grad_norm': 94.32537078857422, 'learning_rate': 8.518543427732949e-09, 'margin_dpo/margin_mean': 28.286869049072266, 'margin_dpo/margin_std': 29.41876220703125, 'logps/chosen': -83.84978485107422, 'logps/rejected': -133.63461303710938, 'logps/ref_chosen': -59.45360565185547, 'logps/ref_rejected': -80.95157623291016, 'logits/chosen': -0.6151013374328613, 'logits/rejected': -0.5717021822929382, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 28.286867141723633, 'margin_dpo/beta_margin_mean': 2.8286869525909424, 'margin_dpo/beta_margin_std': 2.959045886993408, 'margin_dpo/beta_margin_grad_mean': -0.1976294070482254, 'margin_dpo/beta_margin_grad_std': 0.26410892605781555, 'epoch': 0.93} + 93%|████████████████████████████████████████████████████████████████████████▎ | 631/681 [46:45<02:11, 2.62s/it] 93%|████████████████████████████████████████████████████████████████████████▍ | 632/681 [46:47<02:03, 2.53s/it] {'loss': 0.7093, 'grad_norm': 86.42517852783203, 'learning_rate': 8.189576185789637e-09, 'margin_dpo/margin_mean': 32.566551208496094, 'margin_dpo/margin_std': 29.249189376831055, 'logps/chosen': -85.71180725097656, 'logps/rejected': -143.08697509765625, 'logps/ref_chosen': -61.35155487060547, 'logps/ref_rejected': -86.16017150878906, 'logits/chosen': -0.619070291519165, 'logits/rejected': -0.5838553309440613, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 32.566551208496094, 'margin_dpo/beta_margin_mean': 3.256655216217041, 'margin_dpo/beta_margin_std': 3.002157211303711, 'margin_dpo/beta_margin_grad_mean': -0.16710862517356873, 'margin_dpo/beta_margin_grad_std': 0.26828470826148987, 'epoch': 0.93} + 93%|████████████████████████████████████████████████████████████████████████▍ | 632/681 [46:47<02:03, 2.53s/it] 93%|████████████████████████████████████████████████████████████████████████▌ | 633/681 [46:50<02:01, 2.53s/it] {'loss': 0.5499, 'grad_norm': 60.10581970214844, 'learning_rate': 7.866980873399015e-09, 'margin_dpo/margin_mean': 27.419578552246094, 'margin_dpo/margin_std': 24.602121353149414, 'logps/chosen': -80.6368408203125, 'logps/rejected': -142.36219787597656, 'logps/ref_chosen': -57.278167724609375, 'logps/ref_rejected': -91.58395385742188, 'logits/chosen': -0.6361432075500488, 'logits/rejected': -0.6225095987319946, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 27.419578552246094, 'margin_dpo/beta_margin_mean': 2.741957902908325, 'margin_dpo/beta_margin_std': 2.554403066635132, 'margin_dpo/beta_margin_grad_mean': -0.19326050579547882, 'margin_dpo/beta_margin_grad_std': 0.22396619617938995, 'epoch': 0.93} + 93%|████████████████████████████████████████████████████████████████████████▌ | 633/681 [46:50<02:01, 2.53s/it] 93%|████████████████████████████████████████████████████████████████████████▌ | 634/681 [46:53<02:00, 2.56s/it] {'loss': 0.6531, 'grad_norm': 73.96012878417969, 'learning_rate': 7.550765991247654e-09, 'margin_dpo/margin_mean': 27.910812377929688, 'margin_dpo/margin_std': 29.05972671508789, 'logps/chosen': -93.19425964355469, 'logps/rejected': -161.61175537109375, 'logps/ref_chosen': -66.61896514892578, 'logps/ref_rejected': -107.12565612792969, 'logits/chosen': -0.5560423135757446, 'logits/rejected': -0.538284420967102, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 27.910810470581055, 'margin_dpo/beta_margin_mean': 2.791081190109253, 'margin_dpo/beta_margin_std': 2.91212797164917, 'margin_dpo/beta_margin_grad_mean': -0.20842374861240387, 'margin_dpo/beta_margin_grad_std': 0.24925780296325684, 'epoch': 0.93} + 93%|████████████████████████████████████████████████████████████████████████▌ | 634/681 [46:53<02:00, 2.56s/it] 93%|████████████████████████████████████████████████████████████████████████▋ | 635/681 [46:55<01:58, 2.58s/it] {'loss': 0.409, 'grad_norm': 50.861839294433594, 'learning_rate': 7.240939871891699e-09, 'margin_dpo/margin_mean': 28.63296127319336, 'margin_dpo/margin_std': 22.48883628845215, 'logps/chosen': -96.6619873046875, 'logps/rejected': -133.83990478515625, 'logps/ref_chosen': -73.95551300048828, 'logps/ref_rejected': -82.50045776367188, 'logits/chosen': -0.608803391456604, 'logits/rejected': -0.5592911243438721, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 28.63296127319336, 'margin_dpo/beta_margin_mean': 2.8632962703704834, 'margin_dpo/beta_margin_std': 2.2558207511901855, 'margin_dpo/beta_margin_grad_mean': -0.15432217717170715, 'margin_dpo/beta_margin_grad_std': 0.18803834915161133, 'epoch': 0.93} + 93%|████████████████████████████████████████████████████████████████████████▋ | 635/681 [46:55<01:58, 2.58s/it] 93%|████████████████████████████████████████████████████████████████████████▊ | 636/681 [46:58<01:59, 2.66s/it] {'loss': 0.4012, 'grad_norm': 47.65840530395508, 'learning_rate': 6.937510679537628e-09, 'margin_dpo/margin_mean': 33.004608154296875, 'margin_dpo/margin_std': 23.834693908691406, 'logps/chosen': -82.30425262451172, 'logps/rejected': -137.65878295898438, 'logps/ref_chosen': -59.628910064697266, 'logps/ref_rejected': -81.97883605957031, 'logits/chosen': -0.5629330277442932, 'logits/rejected': -0.5346908569335938, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 33.00461196899414, 'margin_dpo/beta_margin_mean': 3.3004610538482666, 'margin_dpo/beta_margin_std': 2.40425968170166, 'margin_dpo/beta_margin_grad_mean': -0.13953568041324615, 'margin_dpo/beta_margin_grad_std': 0.21565653383731842, 'epoch': 0.93} + 93%|████████████████████████████████████████████████████████████████████████▊ | 636/681 [46:58<01:59, 2.66s/it] 94%|████████████████████████████████████████████████████████████████████████▉ | 637/681 [47:01<01:59, 2.73s/it] {'loss': 0.3634, 'grad_norm': 53.40937042236328, 'learning_rate': 6.640486409826785e-09, 'margin_dpo/margin_mean': 32.936134338378906, 'margin_dpo/margin_std': 25.349170684814453, 'logps/chosen': -73.21141815185547, 'logps/rejected': -154.89999389648438, 'logps/ref_chosen': -49.652687072753906, 'logps/ref_rejected': -98.40513610839844, 'logits/chosen': -0.5897486209869385, 'logits/rejected': -0.5671026110649109, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 32.936134338378906, 'margin_dpo/beta_margin_mean': 3.2936134338378906, 'margin_dpo/beta_margin_std': 2.5701606273651123, 'margin_dpo/beta_margin_grad_mean': -0.13438232243061066, 'margin_dpo/beta_margin_grad_std': 0.18966805934906006, 'epoch': 0.94} + 94%|████████████████████████████████████████████████████████████████████████▉ | 637/681 [47:01<01:59, 2.73s/it] 94%|█████████████████████████████████████████████████████████████████████████ | 638/681 [47:04<01:58, 2.75s/it] {'loss': 0.3245, 'grad_norm': 41.96897888183594, 'learning_rate': 6.349874889624962e-09, 'margin_dpo/margin_mean': 37.08156967163086, 'margin_dpo/margin_std': 27.137168884277344, 'logps/chosen': -78.70539855957031, 'logps/rejected': -136.9318084716797, 'logps/ref_chosen': -58.156646728515625, 'logps/ref_rejected': -79.3014907836914, 'logits/chosen': -0.5449614524841309, 'logits/rejected': -0.49521952867507935, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 37.08156967163086, 'margin_dpo/beta_margin_mean': 3.7081568241119385, 'margin_dpo/beta_margin_std': 2.8503551483154297, 'margin_dpo/beta_margin_grad_mean': -0.12071166932582855, 'margin_dpo/beta_margin_grad_std': 0.17367184162139893, 'epoch': 0.94} + 94%|█████████████████████████████████████████████████████████████████████████ | 638/681 [47:04<01:58, 2.75s/it] 94%|█████████████████████████████████████████████████████████████████████████▏ | 639/681 [47:06<01:53, 2.70s/it] {'loss': 0.4397, 'grad_norm': 57.53899383544922, 'learning_rate': 6.065683776815933e-09, 'margin_dpo/margin_mean': 31.09198760986328, 'margin_dpo/margin_std': 24.611787796020508, 'logps/chosen': -97.73635864257812, 'logps/rejected': -130.7800750732422, 'logps/ref_chosen': -72.32319641113281, 'logps/ref_rejected': -74.2749252319336, 'logits/chosen': -0.58185875415802, 'logits/rejected': -0.5182079672813416, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 31.09198570251465, 'margin_dpo/beta_margin_mean': 3.109198570251465, 'margin_dpo/beta_margin_std': 2.4722304344177246, 'margin_dpo/beta_margin_grad_mean': -0.14476662874221802, 'margin_dpo/beta_margin_grad_std': 0.199687659740448, 'epoch': 0.94} + 94%|█████████████████████████████████████████████████████████████████████████▏ | 639/681 [47:06<01:53, 2.70s/it] 94%|█████████████████████████████████████████████████████████████████████████▎ | 640/681 [47:09<01:50, 2.69s/it] {'loss': 0.3066, 'grad_norm': 44.61709213256836, 'learning_rate': 5.7879205600998296e-09, 'margin_dpo/margin_mean': 36.84351348876953, 'margin_dpo/margin_std': 29.767667770385742, 'logps/chosen': -78.43016815185547, 'logps/rejected': -167.73947143554688, 'logps/ref_chosen': -56.13436508178711, 'logps/ref_rejected': -108.60014343261719, 'logits/chosen': -0.5778528451919556, 'logits/rejected': -0.5412660241127014, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 36.84351348876953, 'margin_dpo/beta_margin_mean': 3.6843512058258057, 'margin_dpo/beta_margin_std': 3.017540454864502, 'margin_dpo/beta_margin_grad_mean': -0.12420199811458588, 'margin_dpo/beta_margin_grad_std': 0.15969912707805634, 'epoch': 0.94} + 94%|█████████████████████████████████████████████████████████████████████████▎ | 640/681 [47:09<01:50, 2.69s/it] 94%|█████████████████████████████████████████████████████████████████████████▍ | 641/681 [47:12<01:46, 2.66s/it] {'loss': 0.3746, 'grad_norm': 51.515228271484375, 'learning_rate': 5.516592558795746e-09, 'margin_dpo/margin_mean': 32.17498779296875, 'margin_dpo/margin_std': 29.780851364135742, 'logps/chosen': -88.82362365722656, 'logps/rejected': -142.99404907226562, 'logps/ref_chosen': -64.99689483642578, 'logps/ref_rejected': -86.99232482910156, 'logits/chosen': -0.6603978872299194, 'logits/rejected': -0.6059365272521973, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 32.17498779296875, 'margin_dpo/beta_margin_mean': 3.217498779296875, 'margin_dpo/beta_margin_std': 3.0656890869140625, 'margin_dpo/beta_margin_grad_mean': -0.1466035395860672, 'margin_dpo/beta_margin_grad_std': 0.17220094799995422, 'epoch': 0.94} + 94%|█████████████████████████████████████████████████████████████████████████▍ | 641/681 [47:12<01:46, 2.66s/it] 94%|█████████████████████████████████████████████████████████████████████████▌ | 642/681 [47:14<01:44, 2.68s/it] {'loss': 0.4822, 'grad_norm': 78.2542724609375, 'learning_rate': 5.251706922648868e-09, 'margin_dpo/margin_mean': 35.434226989746094, 'margin_dpo/margin_std': 30.440698623657227, 'logps/chosen': -90.29745483398438, 'logps/rejected': -170.28448486328125, 'logps/ref_chosen': -65.68924713134766, 'logps/ref_rejected': -110.24205017089844, 'logits/chosen': -0.5912165641784668, 'logits/rejected': -0.5562861561775208, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 35.43423080444336, 'margin_dpo/beta_margin_mean': 3.5434229373931885, 'margin_dpo/beta_margin_std': 3.0842573642730713, 'margin_dpo/beta_margin_grad_mean': -0.15366876125335693, 'margin_dpo/beta_margin_grad_std': 0.22277072072029114, 'epoch': 0.94} + 94%|█████████████████████████████████████████████████████████████████████████▌ | 642/681 [47:14<01:44, 2.68s/it] 94%|█████████████████████████████████████████████████████████████████████████▋ | 643/681 [47:17<01:42, 2.69s/it] {'loss': 0.4257, 'grad_norm': 51.46054458618164, 'learning_rate': 4.993270631642038e-09, 'margin_dpo/margin_mean': 30.795534133911133, 'margin_dpo/margin_std': 24.044445037841797, 'logps/chosen': -71.25507354736328, 'logps/rejected': -137.56893920898438, 'logps/ref_chosen': -51.94999694824219, 'logps/ref_rejected': -87.46833801269531, 'logits/chosen': -0.6483656764030457, 'logits/rejected': -0.62122642993927, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 30.795534133911133, 'margin_dpo/beta_margin_mean': 3.0795533657073975, 'margin_dpo/beta_margin_std': 2.446993350982666, 'margin_dpo/beta_margin_grad_mean': -0.14452366530895233, 'margin_dpo/beta_margin_grad_std': 0.19863076508045197, 'epoch': 0.94} + 94%|█████████████████████████████████████████████████████████████████████████▋ | 643/681 [47:17<01:42, 2.69s/it] 95%|█████████████████████████████████████████████████████████████████████████▊ | 644/681 [47:20<01:38, 2.67s/it] {'loss': 0.5657, 'grad_norm': 75.44609069824219, 'learning_rate': 4.741290495811873e-09, 'margin_dpo/margin_mean': 30.231281280517578, 'margin_dpo/margin_std': 28.730857849121094, 'logps/chosen': -79.76002502441406, 'logps/rejected': -138.11033630371094, 'logps/ref_chosen': -59.017662048339844, 'logps/ref_rejected': -87.13668823242188, 'logits/chosen': -0.6009418964385986, 'logits/rejected': -0.57252037525177, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 30.231281280517578, 'margin_dpo/beta_margin_mean': 3.0231282711029053, 'margin_dpo/beta_margin_std': 2.8853414058685303, 'margin_dpo/beta_margin_grad_mean': -0.1847480684518814, 'margin_dpo/beta_margin_grad_std': 0.23598462343215942, 'epoch': 0.95} + 95%|█████████████████████████████████████████████████████████████████████████▊ | 644/681 [47:20<01:38, 2.67s/it] 95%|█████████████████████████████████████████████████████████████████████████▉ | 645/681 [47:22<01:35, 2.66s/it] {'loss': 0.544, 'grad_norm': 70.22451782226562, 'learning_rate': 4.495773155069299e-09, 'margin_dpo/margin_mean': 28.99817657470703, 'margin_dpo/margin_std': 27.904760360717773, 'logps/chosen': -79.71002197265625, 'logps/rejected': -150.6129913330078, 'logps/ref_chosen': -55.87602233886719, 'logps/ref_rejected': -97.78080749511719, 'logits/chosen': -0.5856224298477173, 'logits/rejected': -0.5652365684509277, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 28.99817657470703, 'margin_dpo/beta_margin_mean': 2.899817705154419, 'margin_dpo/beta_margin_std': 2.835707187652588, 'margin_dpo/beta_margin_grad_mean': -0.19084730744361877, 'margin_dpo/beta_margin_grad_std': 0.22894078493118286, 'epoch': 0.95} + 95%|█████████████████████████████████████████████████████████████████████████▉ | 645/681 [47:22<01:35, 2.66s/it] 95%|█████████████████████████████████████████████████████████████████████████▉ | 646/681 [47:25<01:30, 2.59s/it] {'loss': 0.316, 'grad_norm': 51.758888244628906, 'learning_rate': 4.256725079024553e-09, 'margin_dpo/margin_mean': 33.072837829589844, 'margin_dpo/margin_std': 22.390499114990234, 'logps/chosen': -83.82559967041016, 'logps/rejected': -133.1284637451172, 'logps/ref_chosen': -61.275787353515625, 'logps/ref_rejected': -77.50580596923828, 'logits/chosen': -0.6095120906829834, 'logits/rejected': -0.5594819784164429, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 33.072837829589844, 'margin_dpo/beta_margin_mean': 3.307283878326416, 'margin_dpo/beta_margin_std': 2.259632110595703, 'margin_dpo/beta_margin_grad_mean': -0.11909312754869461, 'margin_dpo/beta_margin_grad_std': 0.17143264412879944, 'epoch': 0.95} + 95%|█████████████████████████████████████████████████████████████████████████▉ | 646/681 [47:25<01:30, 2.59s/it] 95%|██████████████████████████████████████████████████████████████████████████ | 647/681 [47:28<01:31, 2.68s/it] {'loss': 0.5032, 'grad_norm': 81.30612182617188, 'learning_rate': 4.024152566816791e-09, 'margin_dpo/margin_mean': 32.761592864990234, 'margin_dpo/margin_std': 26.8262939453125, 'logps/chosen': -78.84927368164062, 'logps/rejected': -150.27786254882812, 'logps/ref_chosen': -54.852413177490234, 'logps/ref_rejected': -93.5194091796875, 'logits/chosen': -0.5496389865875244, 'logits/rejected': -0.5257160067558289, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 32.761592864990234, 'margin_dpo/beta_margin_mean': 3.2761595249176025, 'margin_dpo/beta_margin_std': 2.728703260421753, 'margin_dpo/beta_margin_grad_mean': -0.16137224435806274, 'margin_dpo/beta_margin_grad_std': 0.24134768545627594, 'epoch': 0.95} + 95%|██████████████████████████████████████████████████████████████████████████ | 647/681 [47:28<01:31, 2.68s/it] 95%|██████████████████████████████████████████████████████████████████████████▏ | 648/681 [47:30<01:26, 2.61s/it] {'loss': 0.3728, 'grad_norm': 47.32956314086914, 'learning_rate': 3.798061746947995e-09, 'margin_dpo/margin_mean': 40.340911865234375, 'margin_dpo/margin_std': 34.24688720703125, 'logps/chosen': -73.89356231689453, 'logps/rejected': -158.77578735351562, 'logps/ref_chosen': -54.17146682739258, 'logps/ref_rejected': -98.71279907226562, 'logits/chosen': -0.6139056086540222, 'logits/rejected': -0.6051241159439087, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 40.340911865234375, 'margin_dpo/beta_margin_mean': 4.034091472625732, 'margin_dpo/beta_margin_std': 3.443060874938965, 'margin_dpo/beta_margin_grad_mean': -0.138786181807518, 'margin_dpo/beta_margin_grad_std': 0.19550208747386932, 'epoch': 0.95} + 95%|██████████████████████████████████████████████████████████████████████████▏ | 648/681 [47:30<01:26, 2.61s/it] 95%|██████████████████████████████████████████████████████████████████████████▎ | 649/681 [47:33<01:25, 2.67s/it] {'loss': 0.536, 'grad_norm': 50.813629150390625, 'learning_rate': 3.5784585771215235e-09, 'margin_dpo/margin_mean': 28.49066925048828, 'margin_dpo/margin_std': 28.419557571411133, 'logps/chosen': -83.07283020019531, 'logps/rejected': -129.16033935546875, 'logps/ref_chosen': -62.4803466796875, 'logps/ref_rejected': -80.07717895507812, 'logits/chosen': -0.6515902876853943, 'logits/rejected': -0.6201357841491699, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 28.49066734313965, 'margin_dpo/beta_margin_mean': 2.849066734313965, 'margin_dpo/beta_margin_std': 2.8495798110961914, 'margin_dpo/beta_margin_grad_mean': -0.1962561458349228, 'margin_dpo/beta_margin_grad_std': 0.21189990639686584, 'epoch': 0.95} + 95%|██████████████████████████████████████████████████████████████████████████▎ | 649/681 [47:33<01:25, 2.67s/it] 95%|██████████████████████████████████████████████████████████████████████████▍ | 650/681 [47:35<01:22, 2.65s/it] {'loss': 0.3545, 'grad_norm': 59.442115783691406, 'learning_rate': 3.3653488440851253e-09, 'margin_dpo/margin_mean': 36.623985290527344, 'margin_dpo/margin_std': 28.712535858154297, 'logps/chosen': -80.34698486328125, 'logps/rejected': -159.14297485351562, 'logps/ref_chosen': -56.09281921386719, 'logps/ref_rejected': -98.26483917236328, 'logits/chosen': -0.5737979412078857, 'logits/rejected': -0.5637534260749817, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 36.62398147583008, 'margin_dpo/beta_margin_mean': 3.662398338317871, 'margin_dpo/beta_margin_std': 2.9485061168670654, 'margin_dpo/beta_margin_grad_mean': -0.13270466029644012, 'margin_dpo/beta_margin_grad_std': 0.1840543895959854, 'epoch': 0.95} + 95%|██████████████████████████████████████████████████████████████████████████▍ | 650/681 [47:35<01:22, 2.65s/it] 96%|██████████████████████████████████████████████████████████████████████████▌ | 651/681 [47:38<01:18, 2.63s/it] {'loss': 0.3146, 'grad_norm': 38.10145950317383, 'learning_rate': 3.158738163478475e-09, 'margin_dpo/margin_mean': 35.643516540527344, 'margin_dpo/margin_std': 26.896413803100586, 'logps/chosen': -62.947837829589844, 'logps/rejected': -155.12380981445312, 'logps/ref_chosen': -43.42544937133789, 'logps/ref_rejected': -99.9579086303711, 'logits/chosen': -0.653481125831604, 'logits/rejected': -0.6552349328994751, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 35.64352035522461, 'margin_dpo/beta_margin_mean': 3.564352035522461, 'margin_dpo/beta_margin_std': 2.6986141204833984, 'margin_dpo/beta_margin_grad_mean': -0.12548606097698212, 'margin_dpo/beta_margin_grad_std': 0.1639980524778366, 'epoch': 0.96} + 96%|██████████████████████████████████████████████████████████████████████████▌ | 651/681 [47:38<01:18, 2.63s/it] 96%|██████████████████████████████████████████████████████████████████████████▋ | 652/681 [47:41<01:15, 2.62s/it] {'loss': 0.3386, 'grad_norm': 39.05808639526367, 'learning_rate': 2.9586319796851555e-09, 'margin_dpo/margin_mean': 35.44752502441406, 'margin_dpo/margin_std': 28.104263305664062, 'logps/chosen': -78.93205261230469, 'logps/rejected': -163.570556640625, 'logps/ref_chosen': -62.57680892944336, 'logps/ref_rejected': -111.76779174804688, 'logits/chosen': -0.6412711143493652, 'logits/rejected': -0.617784857749939, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 35.44752502441406, 'margin_dpo/beta_margin_mean': 3.544752597808838, 'margin_dpo/beta_margin_std': 2.8201441764831543, 'margin_dpo/beta_margin_grad_mean': -0.13532721996307373, 'margin_dpo/beta_margin_grad_std': 0.16726936399936676, 'epoch': 0.96} + 96%|██████████████████████████████████████████████████████████████████████████▋ | 652/681 [47:41<01:15, 2.62s/it] 96%|██████████████████████████████████████████████████████████████████████████▊ | 653/681 [47:43<01:12, 2.60s/it] {'loss': 0.3249, 'grad_norm': 51.56984329223633, 'learning_rate': 2.7650355656892166e-09, 'margin_dpo/margin_mean': 35.66375732421875, 'margin_dpo/margin_std': 25.806888580322266, 'logps/chosen': -84.49002075195312, 'logps/rejected': -162.29043579101562, 'logps/ref_chosen': -61.11295700073242, 'logps/ref_rejected': -103.24960327148438, 'logits/chosen': -0.6192601919174194, 'logits/rejected': -0.5976792573928833, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 35.66375732421875, 'margin_dpo/beta_margin_mean': 3.566375970840454, 'margin_dpo/beta_margin_std': 2.590984344482422, 'margin_dpo/beta_margin_grad_mean': -0.12032375484704971, 'margin_dpo/beta_margin_grad_std': 0.18257243931293488, 'epoch': 0.96} + 96%|██████████████████████████████████████████████████████████████████████████▊ | 653/681 [47:43<01:12, 2.60s/it] 96%|██████████████████████████████████████████████████████████████████████████▉ | 654/681 [47:46<01:10, 2.60s/it] {'loss': 0.5285, 'grad_norm': 72.21066284179688, 'learning_rate': 2.577954022936174e-09, 'margin_dpo/margin_mean': 29.483509063720703, 'margin_dpo/margin_std': 28.753616333007812, 'logps/chosen': -86.98482513427734, 'logps/rejected': -153.51400756835938, 'logps/ref_chosen': -61.7281379699707, 'logps/ref_rejected': -98.7738037109375, 'logits/chosen': -0.6111325025558472, 'logits/rejected': -0.6062880754470825, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 29.48350715637207, 'margin_dpo/beta_margin_mean': 2.948350667953491, 'margin_dpo/beta_margin_std': 2.8891336917877197, 'margin_dpo/beta_margin_grad_mean': -0.17636682093143463, 'margin_dpo/beta_margin_grad_std': 0.2301536500453949, 'epoch': 0.96} + 96%|██████████████████████████████████████████████████████████████████████████▉ | 654/681 [47:46<01:10, 2.60s/it] 96%|███████████████████████████████████████████████████████████████████████████ | 655/681 [47:49<01:10, 2.70s/it] {'loss': 0.5089, 'grad_norm': 72.21392059326172, 'learning_rate': 2.397392281198729e-09, 'margin_dpo/margin_mean': 30.534870147705078, 'margin_dpo/margin_std': 29.086572647094727, 'logps/chosen': -70.99528503417969, 'logps/rejected': -150.2451629638672, 'logps/ref_chosen': -49.576812744140625, 'logps/ref_rejected': -98.29183197021484, 'logits/chosen': -0.6073825359344482, 'logits/rejected': -0.6081333160400391, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 30.53487205505371, 'margin_dpo/beta_margin_mean': 3.0534873008728027, 'margin_dpo/beta_margin_std': 2.986149311065674, 'margin_dpo/beta_margin_grad_mean': -0.18340007960796356, 'margin_dpo/beta_margin_grad_std': 0.217272087931633, 'epoch': 0.96} + 96%|███████████████████████████████████████████████████████████████████████████ | 655/681 [47:49<01:10, 2.70s/it] 96%|███████████████████████████████████████████████████████████████████████████▏ | 656/681 [47:51<01:07, 2.72s/it] {'loss': 0.2412, 'grad_norm': 40.71949768066406, 'learning_rate': 2.223355098446622e-09, 'margin_dpo/margin_mean': 41.93996047973633, 'margin_dpo/margin_std': 25.561412811279297, 'logps/chosen': -73.37840270996094, 'logps/rejected': -176.44357299804688, 'logps/ref_chosen': -52.54943084716797, 'logps/ref_rejected': -113.67464447021484, 'logits/chosen': -0.5212767124176025, 'logits/rejected': -0.5257933139801025, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 41.93996047973633, 'margin_dpo/beta_margin_mean': 4.193995952606201, 'margin_dpo/beta_margin_std': 2.617056369781494, 'margin_dpo/beta_margin_grad_mean': -0.0933179035782814, 'margin_dpo/beta_margin_grad_std': 0.15993143618106842, 'epoch': 0.96} + 96%|███████████████████████████████████████████████████████████████████████████▏ | 656/681 [47:51<01:07, 2.72s/it] 96%|███████████████████████████████████████████████████████████████████████████▎ | 657/681 [47:54<01:01, 2.58s/it] {'loss': 0.3432, 'grad_norm': 45.717838287353516, 'learning_rate': 2.055847060721566e-09, 'margin_dpo/margin_mean': 37.42141342163086, 'margin_dpo/margin_std': 28.687862396240234, 'logps/chosen': -68.62776184082031, 'logps/rejected': -157.26351928710938, 'logps/ref_chosen': -46.700538635253906, 'logps/ref_rejected': -97.91487121582031, 'logits/chosen': -0.6373677849769592, 'logits/rejected': -0.6168010234832764, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 37.42141342163086, 'margin_dpo/beta_margin_mean': 3.7421414852142334, 'margin_dpo/beta_margin_std': 2.8833959102630615, 'margin_dpo/beta_margin_grad_mean': -0.11700256913900375, 'margin_dpo/beta_margin_grad_std': 0.1847466230392456, 'epoch': 0.96} + 96%|███████████████████████████████████████████████████████████████████████████▎ | 657/681 [47:54<01:01, 2.58s/it] 97%|███████████████████████████████████████████████████████████████████████████▎ | 658/681 [47:56<00:58, 2.55s/it] {'loss': 0.4487, 'grad_norm': 59.321533203125, 'learning_rate': 1.8948725820160662e-09, 'margin_dpo/margin_mean': 35.119667053222656, 'margin_dpo/margin_std': 29.735076904296875, 'logps/chosen': -86.52423095703125, 'logps/rejected': -156.62518310546875, 'logps/ref_chosen': -60.958213806152344, 'logps/ref_rejected': -95.93949127197266, 'logits/chosen': -0.6310451030731201, 'logits/rejected': -0.5929208993911743, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 35.11966323852539, 'margin_dpo/beta_margin_mean': 3.5119664669036865, 'margin_dpo/beta_margin_std': 3.0297350883483887, 'margin_dpo/beta_margin_grad_mean': -0.14949670433998108, 'margin_dpo/beta_margin_grad_std': 0.2181350290775299, 'epoch': 0.97} + 97%|███████████████████████████████████████████████████████████████████████████▎ | 658/681 [47:56<00:58, 2.55s/it] 97%|███████████████████████████████████████████████████████████████████████████▍ | 659/681 [47:59<00:57, 2.60s/it] {'loss': 0.5047, 'grad_norm': 57.14666748046875, 'learning_rate': 1.7404359041573723e-09, 'margin_dpo/margin_mean': 34.30597686767578, 'margin_dpo/margin_std': 29.283281326293945, 'logps/chosen': -96.09359741210938, 'logps/rejected': -141.1275634765625, 'logps/ref_chosen': -76.74298095703125, 'logps/ref_rejected': -87.4709701538086, 'logits/chosen': -0.6056843996047974, 'logits/rejected': -0.540166974067688, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 34.305973052978516, 'margin_dpo/beta_margin_mean': 3.4305975437164307, 'margin_dpo/beta_margin_std': 2.9324827194213867, 'margin_dpo/beta_margin_grad_mean': -0.16867277026176453, 'margin_dpo/beta_margin_grad_std': 0.23597054183483124, 'epoch': 0.97} + 97%|███████████████████████████████████████████████████████████████████████████▍ | 659/681 [47:59<00:57, 2.60s/it] 97%|███████████████████████████████████████████████████████████████████████████▌ | 660/681 [48:02<00:55, 2.62s/it] {'loss': 0.2917, 'grad_norm': 49.01050567626953, 'learning_rate': 1.592541096695571e-09, 'margin_dpo/margin_mean': 37.83965301513672, 'margin_dpo/margin_std': 27.737031936645508, 'logps/chosen': -80.31892395019531, 'logps/rejected': -135.07073974609375, 'logps/ref_chosen': -59.047882080078125, 'logps/ref_rejected': -75.96005249023438, 'logits/chosen': -0.6273288130760193, 'logits/rejected': -0.5808557271957397, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 37.83964920043945, 'margin_dpo/beta_margin_mean': 3.7839651107788086, 'margin_dpo/beta_margin_std': 2.7794392108917236, 'margin_dpo/beta_margin_grad_mean': -0.1133999153971672, 'margin_dpo/beta_margin_grad_std': 0.1706753671169281, 'epoch': 0.97} + 97%|███████████████████████████████████████████████████████████████████████████▌ | 660/681 [48:02<00:55, 2.62s/it] 97%|███████████████████████████████████████████████████████████████████████████▋ | 661/681 [48:04<00:49, 2.50s/it] {'loss': 0.4523, 'grad_norm': 64.96249389648438, 'learning_rate': 1.4511920567963908e-09, 'margin_dpo/margin_mean': 34.966941833496094, 'margin_dpo/margin_std': 29.39708709716797, 'logps/chosen': -71.31771850585938, 'logps/rejected': -141.6163787841797, 'logps/ref_chosen': -50.673973083496094, 'logps/ref_rejected': -86.00569152832031, 'logits/chosen': -0.6019885540008545, 'logits/rejected': -0.5567299127578735, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 34.966941833496094, 'margin_dpo/beta_margin_mean': 3.4966940879821777, 'margin_dpo/beta_margin_std': 3.0865559577941895, 'margin_dpo/beta_margin_grad_mean': -0.14642944931983948, 'margin_dpo/beta_margin_grad_std': 0.2149476855993271, 'epoch': 0.97} + 97%|███████████████████████████████████████████████████████████████████████████▋ | 661/681 [48:04<00:49, 2.50s/it] 97%|███████████████████████████████████████████████████████████████████████████▊ | 662/681 [48:07<00:49, 2.59s/it] {'loss': 0.378, 'grad_norm': 50.99778747558594, 'learning_rate': 1.3163925091384532e-09, 'margin_dpo/margin_mean': 30.80561065673828, 'margin_dpo/margin_std': 25.51202964782715, 'logps/chosen': -93.49765014648438, 'logps/rejected': -144.09814453125, 'logps/ref_chosen': -69.26106262207031, 'logps/ref_rejected': -89.05593872070312, 'logits/chosen': -0.6079974174499512, 'logits/rejected': -0.5556979775428772, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 30.805612564086914, 'margin_dpo/beta_margin_mean': 3.080561399459839, 'margin_dpo/beta_margin_std': 2.571131944656372, 'margin_dpo/beta_margin_grad_mean': -0.14259321987628937, 'margin_dpo/beta_margin_grad_std': 0.17750491201877594, 'epoch': 0.97} + 97%|███████████████████████████████████████████████████████████████████████████▊ | 662/681 [48:07<00:49, 2.59s/it] 97%|███████████████████████████████████████████████████████████████████████████▉ | 663/681 [48:09<00:47, 2.64s/it] {'loss': 0.3262, 'grad_norm': 39.297733306884766, 'learning_rate': 1.1881460058152382e-09, 'margin_dpo/margin_mean': 33.046897888183594, 'margin_dpo/margin_std': 24.47772216796875, 'logps/chosen': -83.19400024414062, 'logps/rejected': -165.287353515625, 'logps/ref_chosen': -64.87891387939453, 'logps/ref_rejected': -113.92536926269531, 'logits/chosen': -0.6374907493591309, 'logits/rejected': -0.6157968044281006, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 33.04689407348633, 'margin_dpo/beta_margin_mean': 3.304689407348633, 'margin_dpo/beta_margin_std': 2.461198568344116, 'margin_dpo/beta_margin_grad_mean': -0.12396994978189468, 'margin_dpo/beta_margin_grad_std': 0.15943719446659088, 'epoch': 0.97} + 97%|███████████████████████████████████████████████████████████████████████████▉ | 663/681 [48:09<00:47, 2.64s/it] 98%|████████████████████████████████████████████████████████████████████████████ | 664/681 [48:12<00:44, 2.64s/it] {'loss': 0.4288, 'grad_norm': 69.85308074951172, 'learning_rate': 1.066455926241383e-09, 'margin_dpo/margin_mean': 37.14732360839844, 'margin_dpo/margin_std': 26.97930145263672, 'logps/chosen': -84.34225463867188, 'logps/rejected': -166.12283325195312, 'logps/ref_chosen': -60.88847351074219, 'logps/ref_rejected': -105.521728515625, 'logits/chosen': -0.5776158571243286, 'logits/rejected': -0.5483744144439697, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 37.14732360839844, 'margin_dpo/beta_margin_mean': 3.7147321701049805, 'margin_dpo/beta_margin_std': 2.736865997314453, 'margin_dpo/beta_margin_grad_mean': -0.11845803260803223, 'margin_dpo/beta_margin_grad_std': 0.1965658962726593, 'epoch': 0.98} + 98%|████████████████████████████████████████████████████████████████████████████ | 664/681 [48:12<00:44, 2.64s/it] 98%|████████████████████████████████████████████████████████████████████████████▏ | 665/681 [48:14<00:41, 2.57s/it] {'loss': 0.3524, 'grad_norm': 44.827796936035156, 'learning_rate': 9.513254770636137e-10, 'margin_dpo/margin_mean': 31.53219223022461, 'margin_dpo/margin_std': 23.21342658996582, 'logps/chosen': -81.45133972167969, 'logps/rejected': -137.22821044921875, 'logps/ref_chosen': -60.56413269042969, 'logps/ref_rejected': -84.8088150024414, 'logits/chosen': -0.6395413279533386, 'logits/rejected': -0.5962468385696411, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 31.53219223022461, 'margin_dpo/beta_margin_mean': 3.153219223022461, 'margin_dpo/beta_margin_std': 2.3784492015838623, 'margin_dpo/beta_margin_grad_mean': -0.13737604022026062, 'margin_dpo/beta_margin_grad_std': 0.17597481608390808, 'epoch': 0.98} + 98%|████████████████████████████████████████████████████████████████████████████▏ | 665/681 [48:14<00:41, 2.57s/it] 98%|████████████████████████████████████████████████████████████████████████████▎ | 666/681 [48:17<00:39, 2.64s/it] {'loss': 0.4262, 'grad_norm': 61.68048858642578, 'learning_rate': 8.427576920763956e-10, 'margin_dpo/margin_mean': 35.28595733642578, 'margin_dpo/margin_std': 26.031997680664062, 'logps/chosen': -88.06729125976562, 'logps/rejected': -154.82492065429688, 'logps/ref_chosen': -64.41996002197266, 'logps/ref_rejected': -95.89163208007812, 'logits/chosen': -0.6096721887588501, 'logits/rejected': -0.5720229148864746, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 35.28595733642578, 'margin_dpo/beta_margin_mean': 3.5285956859588623, 'margin_dpo/beta_margin_std': 2.662677049636841, 'margin_dpo/beta_margin_grad_mean': -0.13251623511314392, 'margin_dpo/beta_margin_grad_std': 0.21360599994659424, 'epoch': 0.98} + 98%|████████████████████████████████████████████████████████████████████████████▎ | 666/681 [48:17<00:39, 2.64s/it] 98%|████████████████████████████████████████████████████████████████████████████▍ | 667/681 [48:20<00:38, 2.72s/it] {'loss': 0.3242, 'grad_norm': 58.16268539428711, 'learning_rate': 7.407554321417764e-10, 'margin_dpo/margin_mean': 34.412696838378906, 'margin_dpo/margin_std': 24.47201919555664, 'logps/chosen': -94.41732025146484, 'logps/rejected': -147.3884735107422, 'logps/ref_chosen': -69.27703094482422, 'logps/ref_rejected': -87.83549499511719, 'logits/chosen': -0.5887176990509033, 'logits/rejected': -0.536880612373352, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 34.412696838378906, 'margin_dpo/beta_margin_mean': 3.441269636154175, 'margin_dpo/beta_margin_std': 2.472762107849121, 'margin_dpo/beta_margin_grad_mean': -0.12313113361597061, 'margin_dpo/beta_margin_grad_std': 0.16970713436603546, 'epoch': 0.98} + 98%|████████████████████████████████████████████████████████████████████████████▍ | 667/681 [48:20<00:38, 2.72s/it] 98%|████████████████████████████████████████████████████████████████████████████▌ | 668/681 [48:23<00:35, 2.76s/it] {'loss': 0.4507, 'grad_norm': 69.86036682128906, 'learning_rate': 6.453213851142225e-10, 'margin_dpo/margin_mean': 33.146949768066406, 'margin_dpo/margin_std': 25.9494571685791, 'logps/chosen': -96.0662841796875, 'logps/rejected': -160.34828186035156, 'logps/ref_chosen': -72.60400390625, 'logps/ref_rejected': -103.73905181884766, 'logits/chosen': -0.6267153024673462, 'logits/rejected': -0.5883671641349792, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 33.146949768066406, 'margin_dpo/beta_margin_mean': 3.314695358276367, 'margin_dpo/beta_margin_std': 2.6596431732177734, 'margin_dpo/beta_margin_grad_mean': -0.1533161848783493, 'margin_dpo/beta_margin_grad_std': 0.22066539525985718, 'epoch': 0.98} + 98%|████████████████████████████████████████████████████████████████████████████▌ | 668/681 [48:23<00:35, 2.76s/it] 98%|████████████████████████████████████████████████████████████████████████████▋ | 669/681 [48:26<00:33, 2.76s/it] {'loss': 0.5021, 'grad_norm': 68.40164947509766, 'learning_rate': 5.564580657695939e-10, 'margin_dpo/margin_mean': 38.299198150634766, 'margin_dpo/margin_std': 32.602203369140625, 'logps/chosen': -65.71624755859375, 'logps/rejected': -135.82337951660156, 'logps/ref_chosen': -46.116416931152344, 'logps/ref_rejected': -77.92434692382812, 'logits/chosen': -0.6119288802146912, 'logits/rejected': -0.5665886998176575, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 38.299198150634766, 'margin_dpo/beta_margin_mean': 3.8299198150634766, 'margin_dpo/beta_margin_std': 3.285043716430664, 'margin_dpo/beta_margin_grad_mean': -0.15446214377880096, 'margin_dpo/beta_margin_grad_std': 0.23781202733516693, 'epoch': 0.98} + 98%|████████████████████████████████████████████████████████████████████████████▋ | 669/681 [48:26<00:33, 2.76s/it] 98%|████████████████████████████████████████████████████████████████████████████▋ | 670/681 [48:28<00:29, 2.73s/it] {'loss': 0.2702, 'grad_norm': 44.809444427490234, 'learning_rate': 4.741678157389739e-10, 'margin_dpo/margin_mean': 39.02031707763672, 'margin_dpo/margin_std': 25.866138458251953, 'logps/chosen': -83.17808532714844, 'logps/rejected': -156.79319763183594, 'logps/ref_chosen': -62.34575653076172, 'logps/ref_rejected': -96.9405517578125, 'logits/chosen': -0.5694983005523682, 'logits/rejected': -0.5347045660018921, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 39.020320892333984, 'margin_dpo/beta_margin_mean': 3.9020321369171143, 'margin_dpo/beta_margin_std': 2.6109135150909424, 'margin_dpo/beta_margin_grad_mean': -0.10760509222745895, 'margin_dpo/beta_margin_grad_std': 0.15676988661289215, 'epoch': 0.98} + 98%|████████████████████████████████████████████████████████████████████████████▋ | 670/681 [48:28<00:29, 2.73s/it] 99%|████████████████████████████████████████████████████████████████████████████▊ | 671/681 [48:31<00:26, 2.67s/it] {'loss': 0.3555, 'grad_norm': 48.393497467041016, 'learning_rate': 3.9845280344705245e-10, 'margin_dpo/margin_mean': 35.541099548339844, 'margin_dpo/margin_std': 28.457447052001953, 'logps/chosen': -72.3186264038086, 'logps/rejected': -143.67893981933594, 'logps/ref_chosen': -48.00010681152344, 'logps/ref_rejected': -83.81932067871094, 'logits/chosen': -0.5919187068939209, 'logits/rejected': -0.5590361952781677, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 35.54109573364258, 'margin_dpo/beta_margin_mean': 3.554109811782837, 'margin_dpo/beta_margin_std': 2.9027013778686523, 'margin_dpo/beta_margin_grad_mean': -0.13741746544837952, 'margin_dpo/beta_margin_grad_std': 0.1758362054824829, 'epoch': 0.99} + 99%|████████████████████████████████████████████████████████████████████████████▊ | 671/681 [48:31<00:26, 2.67s/it] 99%|████████████████████████████████████████████████████████████████████████████▉ | 672/681 [48:33<00:23, 2.64s/it] {'loss': 0.4842, 'grad_norm': 66.19140625, 'learning_rate': 3.293150240547549e-10, 'margin_dpo/margin_mean': 32.394371032714844, 'margin_dpo/margin_std': 29.72500228881836, 'logps/chosen': -82.76466369628906, 'logps/rejected': -149.71588134765625, 'logps/ref_chosen': -58.583290100097656, 'logps/ref_rejected': -93.14014434814453, 'logits/chosen': -0.6310614347457886, 'logits/rejected': -0.5937498211860657, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 32.394371032714844, 'margin_dpo/beta_margin_mean': 3.2394371032714844, 'margin_dpo/beta_margin_std': 3.0078792572021484, 'margin_dpo/beta_margin_grad_mean': -0.1728401631116867, 'margin_dpo/beta_margin_grad_std': 0.21923863887786865, 'epoch': 0.99} + 99%|████████████████████████████████████████████████████████████████████████████▉ | 672/681 [48:33<00:23, 2.64s/it] 99%|█████████████████████████████████████████████████████████████████████████████ | 673/681 [48:36<00:20, 2.55s/it] {'loss': 0.3112, 'grad_norm': 43.1835823059082, 'learning_rate': 2.6675629940689504e-10, 'margin_dpo/margin_mean': 37.077301025390625, 'margin_dpo/margin_std': 27.354259490966797, 'logps/chosen': -67.85647583007812, 'logps/rejected': -143.50682067871094, 'logps/ref_chosen': -46.72320556640625, 'logps/ref_rejected': -85.29623413085938, 'logits/chosen': -0.6048033237457275, 'logits/rejected': -0.5747998952865601, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 37.07730484008789, 'margin_dpo/beta_margin_mean': 3.707730531692505, 'margin_dpo/beta_margin_std': 2.7531516551971436, 'margin_dpo/beta_margin_grad_mean': -0.1224966049194336, 'margin_dpo/beta_margin_grad_std': 0.16890129446983337, 'epoch': 0.99} + 99%|█████████████████████████████████████████████████████████████████████████████ | 673/681 [48:36<00:20, 2.55s/it] 99%|█████████████████████████████████████████████████████████████████████████████▏| 674/681 [48:39<00:18, 2.60s/it] {'loss': 0.2851, 'grad_norm': 36.11240005493164, 'learning_rate': 2.1077827798404725e-10, 'margin_dpo/margin_mean': 37.874568939208984, 'margin_dpo/margin_std': 28.31113052368164, 'logps/chosen': -67.47659301757812, 'logps/rejected': -129.95156860351562, 'logps/ref_chosen': -45.445526123046875, 'logps/ref_rejected': -70.04593658447266, 'logits/chosen': -0.5830689668655396, 'logits/rejected': -0.5558980703353882, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 37.874568939208984, 'margin_dpo/beta_margin_mean': 3.78745698928833, 'margin_dpo/beta_margin_std': 2.833228826522827, 'margin_dpo/beta_margin_grad_mean': -0.11657389253377914, 'margin_dpo/beta_margin_grad_std': 0.1537243127822876, 'epoch': 0.99} + 99%|█████████████████████████████████████████████████████████████████████████████▏| 674/681 [48:39<00:18, 2.60s/it] 99%|█████████████████████████████████████████████████████████████████████████████▎| 675/681 [48:41<00:15, 2.58s/it] {'loss': 0.3902, 'grad_norm': 66.7328109741211, 'learning_rate': 1.6138243485910863e-10, 'margin_dpo/margin_mean': 39.495147705078125, 'margin_dpo/margin_std': 27.606351852416992, 'logps/chosen': -64.9262924194336, 'logps/rejected': -134.33714294433594, 'logps/ref_chosen': -44.17628479003906, 'logps/ref_rejected': -74.09197998046875, 'logits/chosen': -0.5768786668777466, 'logits/rejected': -0.5500950813293457, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 39.495147705078125, 'margin_dpo/beta_margin_mean': 3.949514865875244, 'margin_dpo/beta_margin_std': 2.772658109664917, 'margin_dpo/beta_margin_grad_mean': -0.11795066297054291, 'margin_dpo/beta_margin_grad_std': 0.21005932986736298, 'epoch': 0.99} + 99%|█████████████████████████████████████████████████████████████████████████████▎| 675/681 [48:41<00:15, 2.58s/it] 99%|█████████████████████████████████████████████████████████████████████████████▍| 676/681 [48:44<00:13, 2.64s/it] {'loss': 0.4162, 'grad_norm': 79.0772933959961, 'learning_rate': 1.1857007165852472e-10, 'margin_dpo/margin_mean': 36.29708480834961, 'margin_dpo/margin_std': 28.651344299316406, 'logps/chosen': -96.71508026123047, 'logps/rejected': -149.972412109375, 'logps/ref_chosen': -71.39852142333984, 'logps/ref_rejected': -88.3587646484375, 'logits/chosen': -0.615682065486908, 'logits/rejected': -0.5794901847839355, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 36.29708480834961, 'margin_dpo/beta_margin_mean': 3.629708766937256, 'margin_dpo/beta_margin_std': 2.896389961242676, 'margin_dpo/beta_margin_grad_mean': -0.1375354677438736, 'margin_dpo/beta_margin_grad_std': 0.20353099703788757, 'epoch': 0.99} + 99%|█████████████████████████████████████████████████████████████████████████████▍| 676/681 [48:44<00:13, 2.64s/it] 99%|█████████████████████████████████████████████████████████████████████████████▌| 677/681 [48:46<00:10, 2.55s/it] {'loss': 0.4482, 'grad_norm': 65.4261245727539, 'learning_rate': 8.23423165278725e-11, 'margin_dpo/margin_mean': 37.449485778808594, 'margin_dpo/margin_std': 28.472801208496094, 'logps/chosen': -79.63191986083984, 'logps/rejected': -138.780517578125, 'logps/ref_chosen': -56.52743911743164, 'logps/ref_rejected': -78.22654724121094, 'logits/chosen': -0.5974393486976624, 'logits/rejected': -0.5463284254074097, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 37.449485778808594, 'margin_dpo/beta_margin_mean': 3.744948625564575, 'margin_dpo/beta_margin_std': 2.872178792953491, 'margin_dpo/beta_margin_grad_mean': -0.13777747750282288, 'margin_dpo/beta_margin_grad_std': 0.22497375309467316, 'epoch': 0.99} + 99%|█████████████████████████████████████████████████████████████████████████████▌| 677/681 [48:46<00:10, 2.55s/it] 100%|█████████████████████████████████████████████████████████████████████████████▋| 678/681 [48:49<00:07, 2.51s/it] {'loss': 0.4485, 'grad_norm': 50.95887756347656, 'learning_rate': 5.270012410216185e-11, 'margin_dpo/margin_mean': 36.70520782470703, 'margin_dpo/margin_std': 31.16322135925293, 'logps/chosen': -67.8372802734375, 'logps/rejected': -139.0126495361328, 'logps/ref_chosen': -46.13447570800781, 'logps/ref_rejected': -80.60462951660156, 'logits/chosen': -0.5905472040176392, 'logits/rejected': -0.5667222738265991, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 36.7052116394043, 'margin_dpo/beta_margin_mean': 3.6705210208892822, 'margin_dpo/beta_margin_std': 3.1234190464019775, 'margin_dpo/beta_margin_grad_mean': -0.16534043848514557, 'margin_dpo/beta_margin_grad_std': 0.21265582740306854, 'epoch': 1.0} + 100%|█████████████████████████████████████████████████████████████████████████████▋| 678/681 [48:49<00:07, 2.51s/it] 100%|█████████████████████████████████████████████████████████████████████████████▊| 679/681 [48:51<00:05, 2.60s/it] {'loss': 0.3274, 'grad_norm': 47.8213005065918, 'learning_rate': 2.9644275480772416e-11, 'margin_dpo/margin_mean': 36.85191345214844, 'margin_dpo/margin_std': 26.87795639038086, 'logps/chosen': -72.65241241455078, 'logps/rejected': -135.8075408935547, 'logps/ref_chosen': -50.294921875, 'logps/ref_rejected': -76.59813690185547, 'logits/chosen': -0.6013349294662476, 'logits/rejected': -0.5681812167167664, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 36.85191345214844, 'margin_dpo/beta_margin_mean': 3.6851911544799805, 'margin_dpo/beta_margin_std': 2.695528745651245, 'margin_dpo/beta_margin_grad_mean': -0.11131599545478821, 'margin_dpo/beta_margin_grad_std': 0.1771748960018158, 'epoch': 1.0} + 100%|█████████████████████████████████████████████████████████████████████████████▊| 679/681 [48:51<00:05, 2.60s/it] 100%|█████████████████████████████████████████████████████████████████████████████▉| 680/681 [48:55<00:02, 2.75s/it] {'loss': 0.381, 'grad_norm': 57.492881774902344, 'learning_rate': 1.31753782067201e-11, 'margin_dpo/margin_mean': 36.17931365966797, 'margin_dpo/margin_std': 29.298704147338867, 'logps/chosen': -99.56130981445312, 'logps/rejected': -171.20968627929688, 'logps/ref_chosen': -76.91569519042969, 'logps/ref_rejected': -112.384765625, 'logits/chosen': -0.6063967347145081, 'logits/rejected': -0.5727298259735107, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 36.1793098449707, 'margin_dpo/beta_margin_mean': 3.6179311275482178, 'margin_dpo/beta_margin_std': 2.948613405227661, 'margin_dpo/beta_margin_grad_mean': -0.1346154808998108, 'margin_dpo/beta_margin_grad_std': 0.1998624950647354, 'epoch': 1.0} + 100%|█████████████████████████████████████████████████████████████████████████████▉| 680/681 [48:55<00:02, 2.75s/it] 100%|██████████████████████████████████████████████████████████████████████████████| 681/681 [48:57<00:00, 2.70s/it] {'loss': 0.4583, 'grad_norm': 52.16978073120117, 'learning_rate': 3.2938662507808745e-12, 'margin_dpo/margin_mean': 31.793434143066406, 'margin_dpo/margin_std': 28.037933349609375, 'logps/chosen': -84.20474243164062, 'logps/rejected': -143.598876953125, 'logps/ref_chosen': -60.957279205322266, 'logps/ref_rejected': -88.5579833984375, 'logits/chosen': -0.6496413946151733, 'logits/rejected': -0.6223350167274475, 'margin_dpo/beta': 0.10000000149011612, 'margin_dpo/loss_margin_mean': 31.793434143066406, 'margin_dpo/beta_margin_mean': 3.1793434619903564, 'margin_dpo/beta_margin_std': 2.862551212310791, 'margin_dpo/beta_margin_grad_mean': -0.16029776632785797, 'margin_dpo/beta_margin_grad_std': 0.20890314877033234, 'epoch': 1.0} + 100%|██████████████████████████████████████████████████████████████████████████████| 681/681 [48:57<00:00, 2.70s/it][INFO|trainer.py:3984] 2026-04-17 22:15:44,156 >> Saving model checkpoint to /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/checkpoint-681 +[INFO|configuration_utils.py:419] 2026-04-17 22:15:44,173 >> Configuration saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/checkpoint-681/config.json +[INFO|configuration_utils.py:911] 2026-04-17 22:15:44,189 >> Configuration saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/checkpoint-681/generation_config.json +[INFO|modeling_utils.py:3580] 2026-04-17 22:16:50,024 >> The model is bigger than the maximum size per checkpoint (5GB) and is going to be split in 6 checkpoint shards. You can find where each parameters has been saved in the index located at /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/checkpoint-681/model.safetensors.index.json. +[INFO|tokenization_utils_base.py:2510] 2026-04-17 22:16:50,066 >> tokenizer config file saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/checkpoint-681/tokenizer_config.json +[INFO|tokenization_utils_base.py:2519] 2026-04-17 22:16:50,129 >> Special tokens file saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/checkpoint-681/special_tokens_map.json +[INFO|trainer.py:4083] 2026-04-17 22:20:52,552 >> Deleting older checkpoint [/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/checkpoint-400] due to args.save_total_limit +[INFO|trainer.py:2681] 2026-04-17 22:20:55,506 >> + +Training completed. Do not forget to share your model on huggingface.co/models =) + + + {'train_runtime': 3273.0613, 'train_samples_per_second': 13.32, 'train_steps_per_second': 0.208, 'train_loss': 0.5730435011495403, 'epoch': 1.0} + 100%|██████████████████████████████████████████████████████████████████████████████| 681/681 [54:25<00:00, 2.70s/it] 100%|██████████████████████████████████████████████████████████████████████████████| 681/681 [54:25<00:00, 4.79s/it] +***** train metrics ***** + epoch = 1.0 + total_flos = 0GF + train_loss = 0.573 + train_runtime = 0:54:33.06 + train_samples = 43598 + train_samples_per_second = 13.32 + train_steps_per_second = 0.208 +2026-04-17 22:20:55 - INFO - __main__ - *** Training complete *** +2026-04-17 22:20:55 - INFO - __main__ - *** Save model *** +[INFO|configuration_utils.py:419] 2026-04-17 22:21:14,696 >> Configuration saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/config.json +[INFO|configuration_utils.py:911] 2026-04-17 22:21:14,701 >> Configuration saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/generation_config.json +[INFO|modeling_utils.py:3580] 2026-04-17 22:22:17,207 >> The model is bigger than the maximum size per checkpoint (5GB) and is going to be split in 7 checkpoint shards. You can find where each parameters has been saved in the index located at /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/model.safetensors.index.json. +[INFO|tokenization_utils_base.py:2510] 2026-04-17 22:22:17,351 >> tokenizer config file saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/tokenizer_config.json +[INFO|tokenization_utils_base.py:2519] 2026-04-17 22:22:17,546 >> Special tokens file saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/special_tokens_map.json +2026-04-17 22:22:17 - INFO - __main__ - Saved HF-compatible model artifacts to /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312 +[INFO|modelcard.py:450] 2026-04-17 22:22:18,393 >> Dropping the following result as it does not have all the necessary fields: +{'dataset': {'name': 'Anthropic/hh-rlhf', 'type': 'Anthropic/hh-rlhf'}} +[INFO|configuration_utils.py:419] 2026-04-17 22:22:18,543 >> Configuration saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/config.json +2026-04-17 22:22:18 - INFO - __main__ - *** Evaluate *** +[INFO|trainer.py:4307] 2026-04-17 22:22:18,546 >> +***** Running Evaluation ***** +[INFO|trainer.py:4309] 2026-04-17 22:22:18,546 >> Num examples = 2339 +[INFO|trainer.py:4312] 2026-04-17 22:22:18,546 >> Batch size = 8 + 0%| | 0/73 [00:00