{ "training_steps": [ 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, 24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39, 40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43, 44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47, 48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51, 51, 52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55, 55, 56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 65, 65, 65, 65, 66, 66, 66, 66, 67, 67, 67, 67, 68, 68, 68, 68, 69, 69, 69, 69, 70, 70, 70, 70, 71, 71, 71, 71, 72, 72, 72, 72, 73, 73, 73, 73, 74, 74, 74, 74, 75, 75, 75, 75, 76, 76, 76, 76, 77, 77, 77, 77, 78, 78, 78, 78, 79, 79, 79, 79, 80, 80, 80, 80, 81, 81, 81, 81, 82, 82, 82, 82, 83, 83, 83, 83, 84, 84, 84, 84, 85, 85, 85, 85, 86, 86, 86, 86, 87, 87, 87, 87, 88, 88, 88, 88, 89, 89, 89, 89, 90, 90, 90, 90, 91, 91, 91, 91, 92, 92, 92, 92, 93, 93, 93, 93, 94, 94, 94, 94, 95, 95, 95, 95, 96, 96, 96, 96, 97, 97, 97, 97, 98, 98, 98, 98, 99, 99, 99, 99 ], "task_rewards": [ 0.373, 0.2524, 0.2524, 0.0484, 0.2644, 0.2644, 0.2644, 0.2644, 0.619, 0.2944, 0.619, 0.619, 0.619, 0.2944, 0.2944, 0.925, 0.4444, 0.2524, 0.4684, 0.2524, 0.619, 0.925, 0.619, 0.619, 0.2644, 0.2584, 0.2884, 0.2584, 0.2524, 0.2584, 0.2584, 0.0304, 0.6003999999999999, 0.2524, 0.2524, 0.2524, 0.2644, 0.2644, 0.2644, 0.2644, 0.4324, 0.2644, 0.2584, 0.2584, 0.27040000000000003, 0.45039999999999997, 0.27040000000000003, 0.27040000000000003, 0.2524, 0.27040000000000003, 0.2524, 0.4324, 0.2644, 0.2644, 0.2524, 0.2524, 0.0304, 0.0304, 0.2524, 0.2524, 0.4324, 0.2524, 0.2524, 0.2524, 0.2524, 0.0484, 0.2524, 0.2524, 0.2944, 0.619, 0.619, 0.6003999999999999, 0.4324, 0.2584, 0.577, 0.2524, 0.27040000000000003, 0.2524, 0.27040000000000003, 0.4324, 0.2584, 0.2524, 0.4324, 0.2584, 0.2584, 0.2644, 0.2644, 0.2644, 0.2584, 0.2644, 0.4324, 0.2644, 0.2524, 0.2524, 0.373, 0.2524, 0.2524, 0.4384, 0.2524, 0.4744, 0.4324, 0.2524, 0.2584, 0.7929999999999999, 0.2584, 0.2824, 0.2524, 0.583, 0.2584, 0.2584, 0.0484, 0.2644, 0.0484, 0.27040000000000003, 0.27040000000000003, 0.27040000000000003, 0.2944, 0.2944, 0.2944, 0.2944, 0.619, 0.619, 0.619, 0.619, 0.27040000000000003, 0.27040000000000003, 0.45039999999999997, 0.27040000000000003, 0.769, 0.2524, 0.2824, 0.0484, 0.2644, 0.2524, 0.2524, 0.2524, 0.2644, 0.2584, 0.2524, 0.2584, 0.4744, 0.2524, 0.589, 0.589, 0.27040000000000003, 0.27040000000000003, 0.27040000000000003, 0.27040000000000003, 0.589, 0.2644, 0.4324, 0.2524, 0.4324, 0.2524, 0.2644, 0.583, 0.4444, 0.2824, 0.2524, 0.2524, 0.2524, 0.4744, 0.2584, 0.2524, 0.925, 0.619, 0.2944, 0.2944, 0.27040000000000003, 0.27040000000000003, 0.27040000000000003, 0.27040000000000003, 0.2644, 0.2524, 0.2524, 0.4444, 0.0484, 0.2524, 0.2524, 0.2524, 0.2524, 0.2524, 0.2524, 0.2524, 0.4324, 0.589, 0.4684, 0.4744, 0.45039999999999997, 0.27040000000000003, 0.27040000000000003, 0.27040000000000003, 0.27040000000000003, 0.27040000000000003, 0.27040000000000003, 0.27040000000000003, 0.589, 0.2584, 0.2644, 0.4324, 0.2524, 0.2644, 0.2584, 0.4384, 0.2944, 0.2944, 0.2944, 0.2944, 0.2584, 0.4384, 0.5800000000000001, 0.4744, 0.4324, 0.2644, 0.2524, 0.2584, 0.2884, 0.802, 0.2524, 0.2524, 0.2884, 0.5920000000000001, 0.5920000000000001, 0.5860000000000001, 0.2524, 0.2584, 0.2524, 0.2584, 0.2944, 0.2944, 0.2944, 0.2944, 0.5920000000000001, 0.2644, 0.5920000000000001, 0.5920000000000001, 0.2524, 0.4324, 0.2524, 0.4684, 0.2644, 0.5920000000000001, 0.5920000000000001, 0.2644, 0.2644, 0.2644, 0.2524, 0.2644, 0.2584, 0.2584, 0.5860000000000001, 0.2584, 0.27040000000000003, 0.0484, 0.27040000000000003, 0.27040000000000003, 0.27040000000000003, 0.27040000000000003, 0.27040000000000003, 0.27040000000000003, 0.2524, 0.5920000000000001, 0.2644, 0.2644, 0.4324, 0.2584, 0.2584, 0.2584, 0.2944, 0.2944, 0.6003999999999999, 0.2944, 0.2644, 0.4744, 0.4624, 0.2584, 0.5920000000000001, 0.5920000000000001, 0.2644, 0.5920000000000001, 0.5860000000000001, 0.2644, 0.5920000000000001, 0.5920000000000001, 0.2644, 0.2524, 0.2584, 0.4324, 0.5920000000000001, 0.2644, 0.2644, 0.2644, 0.2944, 0.2944, 0.2944, 0.6003999999999999, 0.0484, 0.2524, 0.0304, 0.2524, 0.2944, 0.6003999999999999, 0.2944, 0.2944, 0.0484, 0.0484, 0.2524, 0.2584, 0.2524, 0.45039999999999997, 0.2524, 0.2524, 0.27040000000000003, 0.27040000000000003, 0.27040000000000003, 0.27040000000000003, 0.27040000000000003, 0.27040000000000003, 0.27040000000000003, 0.45039999999999997, 0.27040000000000003, 0.27040000000000003, 0.45039999999999997, 0.27040000000000003, 0.2644, 0.2644, 0.2644, 0.2584, 0.2524, 0.2524, 0.2584, 0.2584, 0.2524, 0.2524, 0.4324, 0.2524, 0.2524, 0.4324, 0.0484, 0.2524, 0.2584, 0.2644, 0.2644, 0.2644, 0.27040000000000003, 0.27040000000000003, 0.27040000000000003, 0.45039999999999997, 0.2944, 0.2944, 0.2944, 0.6003999999999999, 0.2944, 0.2944, 0.6003999999999999, 0.2944, 0.2524, 0.2524, 0.2524, 0.0484, 0.2524, 0.2524, 0.4324, 0.2524, 0.2524, 0.2524, 0.0484, 0.2524, 0.4444, 0.2524, 0.27040000000000003, 0.27040000000000003, 0.2944, 0.2944, 0.2944, 0.2944, 0.0484, 0.0484, 0.2524, 0.2524, 0.4324, 0.0484, 0.2644, 0.4624, 0.2524, 0.4684, 0.2584, 0.2524, 0.6003999999999999, 0.2944, 0.2944, 0.2944, 0.2944, 0.2944, 0.2944, 0.2944, 0.5860000000000001, 0.2524, 0.2524, 0.4324 ], "belief_accuracy": [ 0.13332, 0.37332, 0.24, 0.0, 0.24, 0.0, 0.13332, 0.24, 0.31332000000000004, 0.18000000000000002, 0.25332, 0.25332, 0.37332, 0.18000000000000002, 0.24, 0.43332, 0.0, 0.13332, 0.0, 0.0, 0.25332, 0.49332, 0.37332, 0.37332, 0.25332, 0.24, 0.37332, 0.18000000000000002, 0.18000000000000002, 0.13332, 0.12, 0.0, 0.36, 0.18000000000000002, 0.37332, 0.24, 0.37332, 0.24, 0.24, 0.13332, 0.13332, 0.24, 0.0, 0.18000000000000002, 0.24, 0.43332, 0.24, 0.24, 0.18000000000000002, 0.37332, 0.18000000000000002, 0.0, 0.37332, 0.37332, 0.37332, 0.0, 0.13332, 0.13332, 0.13332, 0.24, 0.49332, 0.24, 0.24, 0.37332, 0.24, 0.0, 0.24, 0.24, 0.12, 0.25332, 0.25332, 0.3, 0.36, 0.31332000000000004, 0.0, 0.0, 0.18000000000000002, 0.25332, 0.18000000000000002, 0.36, 0.24, 0.24, 0.18000000000000002, 0.25332, 0.24, 0.24, 0.24, 0.0, 0.24, 0.18000000000000002, 0.43332, 0.24, 0.24, 0.37332, 0.13332, 0.24, 0.24, 0.49332, 0.24, 0.0, 0.0, 0.24, 0.24, 0.24, 0.37332, 0.0, 0.0, 0.37332, 0.36, 0.24, 0.0, 0.24, 0.0, 0.37332, 0.24, 0.37332, 0.18000000000000002, 0.18000000000000002, 0.24, 0.24, 0.37332, 0.25332, 0.37332, 0.31332000000000004, 0.24, 0.37332, 0.49332, 0.25332, 0.36, 0.24, 0.24, 0.0, 0.18000000000000002, 0.31332000000000004, 0.24, 0.37332, 0.31332000000000004, 0.37332, 0.13332, 0.31332000000000004, 0.18000000000000002, 0.0, 0.24, 0.24, 0.24, 0.24, 0.18000000000000002, 0.37332, 0.24, 0.37332, 0.36, 0.37332, 0.24, 0.0, 0.24, 0.49332, 0.49332, 0.24, 0.37332, 0.24, 0.24, 0.13332, 0.0, 0.0, 0.37332, 0.37332, 0.18000000000000002, 0.24, 0.24, 0.37332, 0.24, 0.24, 0.37332, 0.24, 0.0, 0.49332, 0.0, 0.18000000000000002, 0.24, 0.24, 0.31332000000000004, 0.24, 0.12, 0.24, 0.3, 0.18000000000000002, 0.0, 0.24, 0.24, 0.24, 0.24, 0.37332, 0.24, 0.24, 0.24, 0.37332, 0.31332000000000004, 0.24, 0.12, 0.24, 0.0, 0.24, 0.24, 0.36, 0.12, 0.24, 0.12, 0.24, 0.24, 0.36, 0.0, 0.0, 0.36, 0.24, 0.24, 0.24, 0.0, 0.24, 0.24, 0.0, 0.18000000000000002, 0.24, 0.18000000000000002, 0.18000000000000002, 0.24, 0.24, 0.0, 0.0, 0.18000000000000002, 0.24, 0.24, 0.24, 0.0, 0.24, 0.24, 0.24, 0.0, 0.0, 0.0, 0.24, 0.0, 0.24, 0.24, 0.0, 0.24, 0.0, 0.24, 0.24, 0.24, 0.24, 0.12, 0.24, 0.24, 0.0, 0.18000000000000002, 0.24, 0.24, 0.24, 0.24, 0.18000000000000002, 0.24, 0.18000000000000002, 0.24, 0.24, 0.36, 0.24, 0.0, 0.18000000000000002, 0.24, 0.24, 0.36, 0.18000000000000002, 0.24, 0.0, 0.18000000000000002, 0.24, 0.0, 0.0, 0.0, 0.24, 0.24, 0.0, 0.24, 0.24, 0.24, 0.24, 0.24, 0.0, 0.24, 0.0, 0.24, 0.0, 0.18000000000000002, 0.24, 0.24, 0.0, 0.0, 0.24, 0.0, 0.24, 0.18000000000000002, 0.36, 0.18000000000000002, 0.18000000000000002, 0.0, 0.0, 0.24, 0.18000000000000002, 0.18000000000000002, 0.36, 0.24, 0.18000000000000002, 0.24, 0.24, 0.12, 0.24, 0.24, 0.24, 0.24, 0.36, 0.24, 0.24, 0.36, 0.18000000000000002, 0.24, 0.0, 0.0, 0.24, 0.0, 0.0, 0.24, 0.12, 0.24, 0.24, 0.36, 0.24, 0.24, 0.18000000000000002, 0.0, 0.18000000000000002, 0.0, 0.0, 0.24, 0.24, 0.24, 0.24, 0.24, 0.36, 0.24, 0.24, 0.12, 0.36, 0.18000000000000002, 0.18000000000000002, 0.36, 0.12, 0.24, 0.24, 0.12, 0.0, 0.24, 0.24, 0.36, 0.12, 0.24, 0.24, 0.0, 0.18000000000000002, 0.36, 0.24, 0.24, 0.24, 0.24, 0.24, 0.24, 0.18000000000000002, 0.0, 0.0, 0.18000000000000002, 0.24, 0.0, 0.0, 0.24, 0.24, 0.0, 0.24, 0.24, 0.0, 0.36, 0.24, 0.24, 0.24, 0.18000000000000002, 0.24, 0.24, 0.24, 0.24, 0.0, 0.24, 0.18000000000000002 ], "combined_rewards": [ 0.289112, 0.29472200000000004, 0.24806, 0.03146, 0.25586, 0.17186, 0.218522, 0.25586, 0.512012, 0.25436000000000003, 0.491012, 0.491012, 0.533012, 0.25436000000000003, 0.27536, 0.752912, 0.28886, 0.21072200000000002, 0.30446, 0.16406, 0.491012, 0.773912, 0.533012, 0.533012, 0.26052200000000003, 0.25196, 0.318122, 0.23096000000000003, 0.22706, 0.21462200000000003, 0.20996000000000004, 0.01976, 0.5162599999999999, 0.22706, 0.29472200000000004, 0.24806, 0.302522, 0.25586, 0.25586, 0.218522, 0.327722, 0.25586, 0.16796000000000003, 0.23096000000000003, 0.25976, 0.444422, 0.25976, 0.25976, 0.22706, 0.306422, 0.22706, 0.28106000000000003, 0.302522, 0.302522, 0.29472200000000004, 0.16406, 0.066422, 0.066422, 0.21072200000000002, 0.24806, 0.453722, 0.24806, 0.24806, 0.29472200000000004, 0.24806, 0.03146, 0.24806, 0.24806, 0.23336, 0.491012, 0.491012, 0.49526, 0.40706000000000003, 0.27762200000000004, 0.37505, 0.16406, 0.23876000000000003, 0.252722, 0.23876000000000003, 0.40706000000000003, 0.25196, 0.24806, 0.34406000000000003, 0.256622, 0.25196, 0.25586, 0.25586, 0.17186, 0.25196, 0.23486, 0.43272200000000005, 0.25586, 0.24806, 0.29472200000000004, 0.289112, 0.24806, 0.24806, 0.457622, 0.24806, 0.30836, 0.28106000000000003, 0.24806, 0.25196, 0.5994499999999999, 0.29862200000000005, 0.18356, 0.16406, 0.509612, 0.29396, 0.25196, 0.03146, 0.25586, 0.03146, 0.306422, 0.25976, 0.306422, 0.25436000000000003, 0.25436000000000003, 0.27536, 0.27536, 0.533012, 0.491012, 0.533012, 0.512012, 0.25976, 0.306422, 0.46542199999999995, 0.26442200000000005, 0.62585, 0.24806, 0.26756, 0.03146, 0.23486, 0.273722, 0.24806, 0.29472200000000004, 0.28152200000000005, 0.29862200000000005, 0.21072200000000002, 0.27762200000000004, 0.37136, 0.16406, 0.46685, 0.46685, 0.25976, 0.25976, 0.23876000000000003, 0.306422, 0.46685, 0.302522, 0.40706000000000003, 0.29472200000000004, 0.36506000000000005, 0.16406, 0.25586, 0.551612, 0.461522, 0.26756, 0.29472200000000004, 0.24806, 0.24806, 0.355022, 0.16796000000000003, 0.16406, 0.7319120000000001, 0.533012, 0.25436000000000003, 0.27536, 0.25976, 0.306422, 0.25976, 0.25976, 0.302522, 0.24806, 0.16406, 0.461522, 0.03146, 0.22706, 0.24806, 0.24806, 0.273722, 0.24806, 0.20606000000000002, 0.24806, 0.38606, 0.44584999999999997, 0.30446, 0.39236000000000004, 0.37676, 0.25976, 0.25976, 0.306422, 0.25976, 0.25976, 0.25976, 0.306422, 0.49251199999999995, 0.25196, 0.21386, 0.36506000000000005, 0.21406000000000003, 0.30585999999999997, 0.30196, 0.46096, 0.28336, 0.32536, 0.28336, 0.32536, 0.30196, 0.46096, 0.42700000000000005, 0.35836, 0.45706, 0.30585999999999997, 0.29806, 0.30196, 0.23746, 0.6553000000000001, 0.29806, 0.21406000000000003, 0.30046, 0.5188000000000001, 0.4978000000000001, 0.49390000000000006, 0.29806, 0.30196, 0.21406000000000003, 0.21796000000000004, 0.30436, 0.32536, 0.32536, 0.32536, 0.4348000000000001, 0.30585999999999997, 0.5188000000000001, 0.5188000000000001, 0.21406000000000003, 0.33106, 0.21406000000000003, 0.43846, 0.22186, 0.5188000000000001, 0.5188000000000001, 0.22186, 0.30585999999999997, 0.22186, 0.29806, 0.30585999999999997, 0.30196, 0.30196, 0.47290000000000004, 0.30196, 0.30976, 0.08146, 0.28876, 0.30976, 0.30976, 0.30976, 0.30976, 0.28876, 0.29806, 0.4978000000000001, 0.30585999999999997, 0.30585999999999997, 0.45706, 0.30196, 0.21796000000000004, 0.28096000000000004, 0.32536, 0.32536, 0.56626, 0.30436, 0.30585999999999997, 0.35836, 0.41356, 0.30196, 0.4348000000000001, 0.4348000000000001, 0.22186, 0.5188000000000001, 0.5149000000000001, 0.22186, 0.5188000000000001, 0.5188000000000001, 0.30585999999999997, 0.29806, 0.30196, 0.33106, 0.5188000000000001, 0.22186, 0.30585999999999997, 0.22186, 0.30436, 0.32536, 0.32536, 0.44026, 0.08146, 0.29806, 0.06976, 0.29806, 0.30436, 0.56626, 0.30436, 0.30436, 0.08146, 0.08146, 0.29806, 0.28096000000000004, 0.27706000000000003, 0.46875999999999995, 0.29806, 0.27706000000000003, 0.30976, 0.30976, 0.26776, 0.30976, 0.30976, 0.30976, 0.30976, 0.46875999999999995, 0.30976, 0.30976, 0.46875999999999995, 0.28876, 0.30585999999999997, 0.22186, 0.22186, 0.30196, 0.21406000000000003, 0.21406000000000003, 0.30196, 0.25996, 0.29806, 0.29806, 0.45706, 0.29806, 0.29806, 0.39406, 0.08146, 0.27706000000000003, 0.21796000000000004, 0.22186, 0.30585999999999997, 0.30585999999999997, 0.30976, 0.30976, 0.30976, 0.46875999999999995, 0.32536, 0.32536, 0.28336, 0.56626, 0.30436, 0.30436, 0.56626, 0.28336, 0.29806, 0.29806, 0.25606, 0.08146, 0.29806, 0.29806, 0.45706, 0.25606, 0.29806, 0.29806, 0.08146, 0.27706000000000003, 0.46486, 0.29806, 0.30976, 0.30976, 0.32536, 0.32536, 0.32536, 0.30436, 0.08146, 0.08146, 0.27706000000000003, 0.29806, 0.33106, 0.08146, 0.30585999999999997, 0.43456, 0.21406000000000003, 0.43846, 0.30196, 0.21406000000000003, 0.56626, 0.32536, 0.32536, 0.32536, 0.30436, 0.32536, 0.32536, 0.32536, 0.5149000000000001, 0.21406000000000003, 0.29806, 0.39406 ], "config": { "pipeline": "SFT \u2192 GRPO 200 steps \u2192 DPO 50 steps", "base_model": "ProthamD/adaptive-world-grpo-qwen2.5-3b", "sft_examples": 420, "curriculum": { "easy": "0-60", "medium": "60-130", "hard": "130-200" }, "reward_weights": { "task": 0.7, "belief": 0.3 }, "dpo_pairs": 7 } }