Files
adaptive-world-grpo-qwen2.5-3b/training_logs.json

1625 lines
20 KiB
JSON
Raw Normal View History

{
"training_steps": [
0,
0,
0,
0,
1,
1,
1,
1,
2,
2,
2,
2,
3,
3,
3,
3,
4,
4,
4,
4,
5,
5,
5,
5,
6,
6,
6,
6,
7,
7,
7,
7,
8,
8,
8,
8,
9,
9,
9,
9,
10,
10,
10,
10,
11,
11,
11,
11,
12,
12,
12,
12,
13,
13,
13,
13,
14,
14,
14,
14,
15,
15,
15,
15,
16,
16,
16,
16,
17,
17,
17,
17,
18,
18,
18,
18,
19,
19,
19,
19,
20,
20,
20,
20,
21,
21,
21,
21,
22,
22,
22,
22,
23,
23,
23,
23,
24,
24,
24,
24,
25,
25,
25,
25,
26,
26,
26,
26,
27,
27,
27,
27,
28,
28,
28,
28,
29,
29,
29,
29,
30,
30,
30,
30,
31,
31,
31,
31,
32,
32,
32,
32,
33,
33,
33,
33,
34,
34,
34,
34,
35,
35,
35,
35,
36,
36,
36,
36,
37,
37,
37,
37,
38,
38,
38,
38,
39,
39,
39,
39,
40,
40,
40,
40,
41,
41,
41,
41,
42,
42,
42,
42,
43,
43,
43,
43,
44,
44,
44,
44,
45,
45,
45,
45,
46,
46,
46,
46,
47,
47,
47,
47,
48,
48,
48,
48,
49,
49,
49,
49,
50,
50,
50,
50,
51,
51,
51,
51,
52,
52,
52,
52,
53,
53,
53,
53,
54,
54,
54,
54,
55,
55,
55,
55,
56,
56,
56,
56,
57,
57,
57,
57,
58,
58,
58,
58,
59,
59,
59,
59,
60,
60,
60,
60,
61,
61,
61,
61,
62,
62,
62,
62,
63,
63,
63,
63,
64,
64,
64,
64,
65,
65,
65,
65,
66,
66,
66,
66,
67,
67,
67,
67,
68,
68,
68,
68,
69,
69,
69,
69,
70,
70,
70,
70,
71,
71,
71,
71,
72,
72,
72,
72,
73,
73,
73,
73,
74,
74,
74,
74,
75,
75,
75,
75,
76,
76,
76,
76,
77,
77,
77,
77,
78,
78,
78,
78,
79,
79,
79,
79,
80,
80,
80,
80,
81,
81,
81,
81,
82,
82,
82,
82,
83,
83,
83,
83,
84,
84,
84,
84,
85,
85,
85,
85,
86,
86,
86,
86,
87,
87,
87,
87,
88,
88,
88,
88,
89,
89,
89,
89,
90,
90,
90,
90,
91,
91,
91,
91,
92,
92,
92,
92,
93,
93,
93,
93,
94,
94,
94,
94,
95,
95,
95,
95,
96,
96,
96,
96,
97,
97,
97,
97,
98,
98,
98,
98,
99,
99,
99,
99
],
"task_rewards": [
0.373,
0.2524,
0.2524,
0.0484,
0.2644,
0.2644,
0.2644,
0.2644,
0.619,
0.2944,
0.619,
0.619,
0.619,
0.2944,
0.2944,
0.925,
0.4444,
0.2524,
0.4684,
0.2524,
0.619,
0.925,
0.619,
0.619,
0.2644,
0.2584,
0.2884,
0.2584,
0.2524,
0.2584,
0.2584,
0.0304,
0.6003999999999999,
0.2524,
0.2524,
0.2524,
0.2644,
0.2644,
0.2644,
0.2644,
0.4324,
0.2644,
0.2584,
0.2584,
0.27040000000000003,
0.45039999999999997,
0.27040000000000003,
0.27040000000000003,
0.2524,
0.27040000000000003,
0.2524,
0.4324,
0.2644,
0.2644,
0.2524,
0.2524,
0.0304,
0.0304,
0.2524,
0.2524,
0.4324,
0.2524,
0.2524,
0.2524,
0.2524,
0.0484,
0.2524,
0.2524,
0.2944,
0.619,
0.619,
0.6003999999999999,
0.4324,
0.2584,
0.577,
0.2524,
0.27040000000000003,
0.2524,
0.27040000000000003,
0.4324,
0.2584,
0.2524,
0.4324,
0.2584,
0.2584,
0.2644,
0.2644,
0.2644,
0.2584,
0.2644,
0.4324,
0.2644,
0.2524,
0.2524,
0.373,
0.2524,
0.2524,
0.4384,
0.2524,
0.4744,
0.4324,
0.2524,
0.2584,
0.7929999999999999,
0.2584,
0.2824,
0.2524,
0.583,
0.2584,
0.2584,
0.0484,
0.2644,
0.0484,
0.27040000000000003,
0.27040000000000003,
0.27040000000000003,
0.2944,
0.2944,
0.2944,
0.2944,
0.619,
0.619,
0.619,
0.619,
0.27040000000000003,
0.27040000000000003,
0.45039999999999997,
0.27040000000000003,
0.769,
0.2524,
0.2824,
0.0484,
0.2644,
0.2524,
0.2524,
0.2524,
0.2644,
0.2584,
0.2524,
0.2584,
0.4744,
0.2524,
0.589,
0.589,
0.27040000000000003,
0.27040000000000003,
0.27040000000000003,
0.27040000000000003,
0.589,
0.2644,
0.4324,
0.2524,
0.4324,
0.2524,
0.2644,
0.583,
0.4444,
0.2824,
0.2524,
0.2524,
0.2524,
0.4744,
0.2584,
0.2524,
0.925,
0.619,
0.2944,
0.2944,
0.27040000000000003,
0.27040000000000003,
0.27040000000000003,
0.27040000000000003,
0.2644,
0.2524,
0.2524,
0.4444,
0.0484,
0.2524,
0.2524,
0.2524,
0.2524,
0.2524,
0.2524,
0.2524,
0.4324,
0.589,
0.4684,
0.4744,
0.45039999999999997,
0.27040000000000003,
0.27040000000000003,
0.27040000000000003,
0.27040000000000003,
0.27040000000000003,
0.27040000000000003,
0.27040000000000003,
0.589,
0.2584,
0.2644,
0.4324,
0.2524,
0.2644,
0.2584,
0.4384,
0.2944,
0.2944,
0.2944,
0.2944,
0.2584,
0.4384,
0.5800000000000001,
0.4744,
0.4324,
0.2644,
0.2524,
0.2584,
0.2884,
0.802,
0.2524,
0.2524,
0.2884,
0.5920000000000001,
0.5920000000000001,
0.5860000000000001,
0.2524,
0.2584,
0.2524,
0.2584,
0.2944,
0.2944,
0.2944,
0.2944,
0.5920000000000001,
0.2644,
0.5920000000000001,
0.5920000000000001,
0.2524,
0.4324,
0.2524,
0.4684,
0.2644,
0.5920000000000001,
0.5920000000000001,
0.2644,
0.2644,
0.2644,
0.2524,
0.2644,
0.2584,
0.2584,
0.5860000000000001,
0.2584,
0.27040000000000003,
0.0484,
0.27040000000000003,
0.27040000000000003,
0.27040000000000003,
0.27040000000000003,
0.27040000000000003,
0.27040000000000003,
0.2524,
0.5920000000000001,
0.2644,
0.2644,
0.4324,
0.2584,
0.2584,
0.2584,
0.2944,
0.2944,
0.6003999999999999,
0.2944,
0.2644,
0.4744,
0.4624,
0.2584,
0.5920000000000001,
0.5920000000000001,
0.2644,
0.5920000000000001,
0.5860000000000001,
0.2644,
0.5920000000000001,
0.5920000000000001,
0.2644,
0.2524,
0.2584,
0.4324,
0.5920000000000001,
0.2644,
0.2644,
0.2644,
0.2944,
0.2944,
0.2944,
0.6003999999999999,
0.0484,
0.2524,
0.0304,
0.2524,
0.2944,
0.6003999999999999,
0.2944,
0.2944,
0.0484,
0.0484,
0.2524,
0.2584,
0.2524,
0.45039999999999997,
0.2524,
0.2524,
0.27040000000000003,
0.27040000000000003,
0.27040000000000003,
0.27040000000000003,
0.27040000000000003,
0.27040000000000003,
0.27040000000000003,
0.45039999999999997,
0.27040000000000003,
0.27040000000000003,
0.45039999999999997,
0.27040000000000003,
0.2644,
0.2644,
0.2644,
0.2584,
0.2524,
0.2524,
0.2584,
0.2584,
0.2524,
0.2524,
0.4324,
0.2524,
0.2524,
0.4324,
0.0484,
0.2524,
0.2584,
0.2644,
0.2644,
0.2644,
0.27040000000000003,
0.27040000000000003,
0.27040000000000003,
0.45039999999999997,
0.2944,
0.2944,
0.2944,
0.6003999999999999,
0.2944,
0.2944,
0.6003999999999999,
0.2944,
0.2524,
0.2524,
0.2524,
0.0484,
0.2524,
0.2524,
0.4324,
0.2524,
0.2524,
0.2524,
0.0484,
0.2524,
0.4444,
0.2524,
0.27040000000000003,
0.27040000000000003,
0.2944,
0.2944,
0.2944,
0.2944,
0.0484,
0.0484,
0.2524,
0.2524,
0.4324,
0.0484,
0.2644,
0.4624,
0.2524,
0.4684,
0.2584,
0.2524,
0.6003999999999999,
0.2944,
0.2944,
0.2944,
0.2944,
0.2944,
0.2944,
0.2944,
0.5860000000000001,
0.2524,
0.2524,
0.4324
],
"belief_accuracy": [
0.13332,
0.37332,
0.24,
0.0,
0.24,
0.0,
0.13332,
0.24,
0.31332000000000004,
0.18000000000000002,
0.25332,
0.25332,
0.37332,
0.18000000000000002,
0.24,
0.43332,
0.0,
0.13332,
0.0,
0.0,
0.25332,
0.49332,
0.37332,
0.37332,
0.25332,
0.24,
0.37332,
0.18000000000000002,
0.18000000000000002,
0.13332,
0.12,
0.0,
0.36,
0.18000000000000002,
0.37332,
0.24,
0.37332,
0.24,
0.24,
0.13332,
0.13332,
0.24,
0.0,
0.18000000000000002,
0.24,
0.43332,
0.24,
0.24,
0.18000000000000002,
0.37332,
0.18000000000000002,
0.0,
0.37332,
0.37332,
0.37332,
0.0,
0.13332,
0.13332,
0.13332,
0.24,
0.49332,
0.24,
0.24,
0.37332,
0.24,
0.0,
0.24,
0.24,
0.12,
0.25332,
0.25332,
0.3,
0.36,
0.31332000000000004,
0.0,
0.0,
0.18000000000000002,
0.25332,
0.18000000000000002,
0.36,
0.24,
0.24,
0.18000000000000002,
0.25332,
0.24,
0.24,
0.24,
0.0,
0.24,
0.18000000000000002,
0.43332,
0.24,
0.24,
0.37332,
0.13332,
0.24,
0.24,
0.49332,
0.24,
0.0,
0.0,
0.24,
0.24,
0.24,
0.37332,
0.0,
0.0,
0.37332,
0.36,
0.24,
0.0,
0.24,
0.0,
0.37332,
0.24,
0.37332,
0.18000000000000002,
0.18000000000000002,
0.24,
0.24,
0.37332,
0.25332,
0.37332,
0.31332000000000004,
0.24,
0.37332,
0.49332,
0.25332,
0.36,
0.24,
0.24,
0.0,
0.18000000000000002,
0.31332000000000004,
0.24,
0.37332,
0.31332000000000004,
0.37332,
0.13332,
0.31332000000000004,
0.18000000000000002,
0.0,
0.24,
0.24,
0.24,
0.24,
0.18000000000000002,
0.37332,
0.24,
0.37332,
0.36,
0.37332,
0.24,
0.0,
0.24,
0.49332,
0.49332,
0.24,
0.37332,
0.24,
0.24,
0.13332,
0.0,
0.0,
0.37332,
0.37332,
0.18000000000000002,
0.24,
0.24,
0.37332,
0.24,
0.24,
0.37332,
0.24,
0.0,
0.49332,
0.0,
0.18000000000000002,
0.24,
0.24,
0.31332000000000004,
0.24,
0.12,
0.24,
0.3,
0.18000000000000002,
0.0,
0.24,
0.24,
0.24,
0.24,
0.37332,
0.24,
0.24,
0.24,
0.37332,
0.31332000000000004,
0.24,
0.12,
0.24,
0.0,
0.24,
0.24,
0.36,
0.12,
0.24,
0.12,
0.24,
0.24,
0.36,
0.0,
0.0,
0.36,
0.24,
0.24,
0.24,
0.0,
0.24,
0.24,
0.0,
0.18000000000000002,
0.24,
0.18000000000000002,
0.18000000000000002,
0.24,
0.24,
0.0,
0.0,
0.18000000000000002,
0.24,
0.24,
0.24,
0.0,
0.24,
0.24,
0.24,
0.0,
0.0,
0.0,
0.24,
0.0,
0.24,
0.24,
0.0,
0.24,
0.0,
0.24,
0.24,
0.24,
0.24,
0.12,
0.24,
0.24,
0.0,
0.18000000000000002,
0.24,
0.24,
0.24,
0.24,
0.18000000000000002,
0.24,
0.18000000000000002,
0.24,
0.24,
0.36,
0.24,
0.0,
0.18000000000000002,
0.24,
0.24,
0.36,
0.18000000000000002,
0.24,
0.0,
0.18000000000000002,
0.24,
0.0,
0.0,
0.0,
0.24,
0.24,
0.0,
0.24,
0.24,
0.24,
0.24,
0.24,
0.0,
0.24,
0.0,
0.24,
0.0,
0.18000000000000002,
0.24,
0.24,
0.0,
0.0,
0.24,
0.0,
0.24,
0.18000000000000002,
0.36,
0.18000000000000002,
0.18000000000000002,
0.0,
0.0,
0.24,
0.18000000000000002,
0.18000000000000002,
0.36,
0.24,
0.18000000000000002,
0.24,
0.24,
0.12,
0.24,
0.24,
0.24,
0.24,
0.36,
0.24,
0.24,
0.36,
0.18000000000000002,
0.24,
0.0,
0.0,
0.24,
0.0,
0.0,
0.24,
0.12,
0.24,
0.24,
0.36,
0.24,
0.24,
0.18000000000000002,
0.0,
0.18000000000000002,
0.0,
0.0,
0.24,
0.24,
0.24,
0.24,
0.24,
0.36,
0.24,
0.24,
0.12,
0.36,
0.18000000000000002,
0.18000000000000002,
0.36,
0.12,
0.24,
0.24,
0.12,
0.0,
0.24,
0.24,
0.36,
0.12,
0.24,
0.24,
0.0,
0.18000000000000002,
0.36,
0.24,
0.24,
0.24,
0.24,
0.24,
0.24,
0.18000000000000002,
0.0,
0.0,
0.18000000000000002,
0.24,
0.0,
0.0,
0.24,
0.24,
0.0,
0.24,
0.24,
0.0,
0.36,
0.24,
0.24,
0.24,
0.18000000000000002,
0.24,
0.24,
0.24,
0.24,
0.0,
0.24,
0.18000000000000002
],
"combined_rewards": [
0.289112,
0.29472200000000004,
0.24806,
0.03146,
0.25586,
0.17186,
0.218522,
0.25586,
0.512012,
0.25436000000000003,
0.491012,
0.491012,
0.533012,
0.25436000000000003,
0.27536,
0.752912,
0.28886,
0.21072200000000002,
0.30446,
0.16406,
0.491012,
0.773912,
0.533012,
0.533012,
0.26052200000000003,
0.25196,
0.318122,
0.23096000000000003,
0.22706,
0.21462200000000003,
0.20996000000000004,
0.01976,
0.5162599999999999,
0.22706,
0.29472200000000004,
0.24806,
0.302522,
0.25586,
0.25586,
0.218522,
0.327722,
0.25586,
0.16796000000000003,
0.23096000000000003,
0.25976,
0.444422,
0.25976,
0.25976,
0.22706,
0.306422,
0.22706,
0.28106000000000003,
0.302522,
0.302522,
0.29472200000000004,
0.16406,
0.066422,
0.066422,
0.21072200000000002,
0.24806,
0.453722,
0.24806,
0.24806,
0.29472200000000004,
0.24806,
0.03146,
0.24806,
0.24806,
0.23336,
0.491012,
0.491012,
0.49526,
0.40706000000000003,
0.27762200000000004,
0.37505,
0.16406,
0.23876000000000003,
0.252722,
0.23876000000000003,
0.40706000000000003,
0.25196,
0.24806,
0.34406000000000003,
0.256622,
0.25196,
0.25586,
0.25586,
0.17186,
0.25196,
0.23486,
0.43272200000000005,
0.25586,
0.24806,
0.29472200000000004,
0.289112,
0.24806,
0.24806,
0.457622,
0.24806,
0.30836,
0.28106000000000003,
0.24806,
0.25196,
0.5994499999999999,
0.29862200000000005,
0.18356,
0.16406,
0.509612,
0.29396,
0.25196,
0.03146,
0.25586,
0.03146,
0.306422,
0.25976,
0.306422,
0.25436000000000003,
0.25436000000000003,
0.27536,
0.27536,
0.533012,
0.491012,
0.533012,
0.512012,
0.25976,
0.306422,
0.46542199999999995,
0.26442200000000005,
0.62585,
0.24806,
0.26756,
0.03146,
0.23486,
0.273722,
0.24806,
0.29472200000000004,
0.28152200000000005,
0.29862200000000005,
0.21072200000000002,
0.27762200000000004,
0.37136,
0.16406,
0.46685,
0.46685,
0.25976,
0.25976,
0.23876000000000003,
0.306422,
0.46685,
0.302522,
0.40706000000000003,
0.29472200000000004,
0.36506000000000005,
0.16406,
0.25586,
0.551612,
0.461522,
0.26756,
0.29472200000000004,
0.24806,
0.24806,
0.355022,
0.16796000000000003,
0.16406,
0.7319120000000001,
0.533012,
0.25436000000000003,
0.27536,
0.25976,
0.306422,
0.25976,
0.25976,
0.302522,
0.24806,
0.16406,
0.461522,
0.03146,
0.22706,
0.24806,
0.24806,
0.273722,
0.24806,
0.20606000000000002,
0.24806,
0.38606,
0.44584999999999997,
0.30446,
0.39236000000000004,
0.37676,
0.25976,
0.25976,
0.306422,
0.25976,
0.25976,
0.25976,
0.306422,
0.49251199999999995,
0.25196,
0.21386,
0.36506000000000005,
0.21406000000000003,
0.30585999999999997,
0.30196,
0.46096,
0.28336,
0.32536,
0.28336,
0.32536,
0.30196,
0.46096,
0.42700000000000005,
0.35836,
0.45706,
0.30585999999999997,
0.29806,
0.30196,
0.23746,
0.6553000000000001,
0.29806,
0.21406000000000003,
0.30046,
0.5188000000000001,
0.4978000000000001,
0.49390000000000006,
0.29806,
0.30196,
0.21406000000000003,
0.21796000000000004,
0.30436,
0.32536,
0.32536,
0.32536,
0.4348000000000001,
0.30585999999999997,
0.5188000000000001,
0.5188000000000001,
0.21406000000000003,
0.33106,
0.21406000000000003,
0.43846,
0.22186,
0.5188000000000001,
0.5188000000000001,
0.22186,
0.30585999999999997,
0.22186,
0.29806,
0.30585999999999997,
0.30196,
0.30196,
0.47290000000000004,
0.30196,
0.30976,
0.08146,
0.28876,
0.30976,
0.30976,
0.30976,
0.30976,
0.28876,
0.29806,
0.4978000000000001,
0.30585999999999997,
0.30585999999999997,
0.45706,
0.30196,
0.21796000000000004,
0.28096000000000004,
0.32536,
0.32536,
0.56626,
0.30436,
0.30585999999999997,
0.35836,
0.41356,
0.30196,
0.4348000000000001,
0.4348000000000001,
0.22186,
0.5188000000000001,
0.5149000000000001,
0.22186,
0.5188000000000001,
0.5188000000000001,
0.30585999999999997,
0.29806,
0.30196,
0.33106,
0.5188000000000001,
0.22186,
0.30585999999999997,
0.22186,
0.30436,
0.32536,
0.32536,
0.44026,
0.08146,
0.29806,
0.06976,
0.29806,
0.30436,
0.56626,
0.30436,
0.30436,
0.08146,
0.08146,
0.29806,
0.28096000000000004,
0.27706000000000003,
0.46875999999999995,
0.29806,
0.27706000000000003,
0.30976,
0.30976,
0.26776,
0.30976,
0.30976,
0.30976,
0.30976,
0.46875999999999995,
0.30976,
0.30976,
0.46875999999999995,
0.28876,
0.30585999999999997,
0.22186,
0.22186,
0.30196,
0.21406000000000003,
0.21406000000000003,
0.30196,
0.25996,
0.29806,
0.29806,
0.45706,
0.29806,
0.29806,
0.39406,
0.08146,
0.27706000000000003,
0.21796000000000004,
0.22186,
0.30585999999999997,
0.30585999999999997,
0.30976,
0.30976,
0.30976,
0.46875999999999995,
0.32536,
0.32536,
0.28336,
0.56626,
0.30436,
0.30436,
0.56626,
0.28336,
0.29806,
0.29806,
0.25606,
0.08146,
0.29806,
0.29806,
0.45706,
0.25606,
0.29806,
0.29806,
0.08146,
0.27706000000000003,
0.46486,
0.29806,
0.30976,
0.30976,
0.32536,
0.32536,
0.32536,
0.30436,
0.08146,
0.08146,
0.27706000000000003,
0.29806,
0.33106,
0.08146,
0.30585999999999997,
0.43456,
0.21406000000000003,
0.43846,
0.30196,
0.21406000000000003,
0.56626,
0.32536,
0.32536,
0.32536,
0.30436,
0.32536,
0.32536,
0.32536,
0.5149000000000001,
0.21406000000000003,
0.29806,
0.39406
],
"config": {
"pipeline": "SFT \u2192 GRPO 200 steps \u2192 DPO 50 steps",
"base_model": "ProthamD/adaptive-world-grpo-qwen2.5-3b",
"sft_examples": 420,
"curriculum": {
"easy": "0-60",
"medium": "60-130",
"hard": "130-200"
},
"reward_weights": {
"task": 0.7,
"belief": 0.3
},
"dpo_pairs": 7
}
}