3844 lines
134 KiB
JSON
3844 lines
134 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.22857142857142856,
|
|
"eval_steps": 500,
|
|
"global_step": 200,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2124.791679382324,
|
|
"dapo/avg_reward_std": 0.28261276125907897,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.42666667342185977,
|
|
"dapo/num_sampling_attempts": 3.125,
|
|
"dapo/sampling_efficiency": 45.83333333333333,
|
|
"dapo/total_prompts_processed": 18.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.001142857142857143,
|
|
"grad_norm": 0.03718917816877365,
|
|
"kl": 0.0,
|
|
"learning_rate": 0.0,
|
|
"loss": -0.0465,
|
|
"reward": 0.6372265852987766,
|
|
"reward_std": 0.9629172012209892,
|
|
"step": 1
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2559.6631774902344,
|
|
"dapo/avg_reward_std": 0.2737089714833668,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.39285715403301374,
|
|
"dapo/num_sampling_attempts": 3.5,
|
|
"dapo/sampling_efficiency": 32.291666666666664,
|
|
"dapo/total_prompts_processed": 21.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.002285714285714286,
|
|
"grad_norm": 0.031548872590065,
|
|
"kl": 0.0,
|
|
"learning_rate": 1e-07,
|
|
"loss": 0.0292,
|
|
"reward": 0.2883484517224133,
|
|
"reward_std": 0.9225177392363548,
|
|
"step": 2
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2259.0243072509766,
|
|
"dapo/avg_reward_std": 0.30627372419392623,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.40740741734151487,
|
|
"dapo/num_sampling_attempts": 3.375,
|
|
"dapo/sampling_efficiency": 38.33333333333333,
|
|
"dapo/total_prompts_processed": 20.25,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.0034285714285714284,
|
|
"grad_norm": 0.028476394712924957,
|
|
"kl": 3.738701343536377e-05,
|
|
"learning_rate": 2e-07,
|
|
"loss": 0.0118,
|
|
"reward": 0.5692771524190903,
|
|
"reward_std": 0.9722258150577545,
|
|
"step": 3
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2388.763916015625,
|
|
"dapo/avg_reward_std": 0.2417103610932827,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.34895834093913436,
|
|
"dapo/num_sampling_attempts": 4.0,
|
|
"dapo/sampling_efficiency": 29.479166666666664,
|
|
"dapo/total_prompts_processed": 24.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.004571428571428572,
|
|
"grad_norm": 0.03074878267943859,
|
|
"kl": 3.4555792808532715e-05,
|
|
"learning_rate": 3e-07,
|
|
"loss": 0.0428,
|
|
"reward": 0.5176859218627214,
|
|
"reward_std": 0.9351213574409485,
|
|
"step": 4
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2228.9131927490234,
|
|
"dapo/avg_reward_std": 0.24784977205338016,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3494623731220922,
|
|
"dapo/num_sampling_attempts": 3.875,
|
|
"dapo/sampling_efficiency": 34.375,
|
|
"dapo/total_prompts_processed": 23.25,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.005714285714285714,
|
|
"grad_norm": 0.03052515536546707,
|
|
"kl": 4.2438507080078125e-05,
|
|
"learning_rate": 4e-07,
|
|
"loss": 0.0573,
|
|
"reward": 0.5747799873352051,
|
|
"reward_std": 0.9150463417172432,
|
|
"step": 5
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2526.2743377685547,
|
|
"dapo/avg_reward_std": 0.31032066589052026,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.4772727367552844,
|
|
"dapo/num_sampling_attempts": 2.75,
|
|
"dapo/sampling_efficiency": 39.58333333333333,
|
|
"dapo/total_prompts_processed": 16.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.006857142857142857,
|
|
"grad_norm": 0.031065233051776886,
|
|
"kl": 6.331503391265869e-05,
|
|
"learning_rate": 5e-07,
|
|
"loss": 0.068,
|
|
"reward": 0.49577395524829626,
|
|
"reward_std": 0.9604900777339935,
|
|
"step": 6
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2096.857650756836,
|
|
"dapo/avg_reward_std": 0.30248596491637053,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.43827161303272955,
|
|
"dapo/num_sampling_attempts": 3.375,
|
|
"dapo/sampling_efficiency": 33.33333333333333,
|
|
"dapo/total_prompts_processed": 20.25,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.008,
|
|
"grad_norm": 0.03395611792802811,
|
|
"kl": 3.603100776672363e-05,
|
|
"learning_rate": 6e-07,
|
|
"loss": 0.0104,
|
|
"reward": 0.6337036956101656,
|
|
"reward_std": 0.9339632987976074,
|
|
"step": 7
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2080.482681274414,
|
|
"dapo/avg_reward_std": 0.2619025791063905,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3489583395421505,
|
|
"dapo/num_sampling_attempts": 4.0,
|
|
"dapo/sampling_efficiency": 27.82738095238095,
|
|
"dapo/total_prompts_processed": 24.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.009142857142857144,
|
|
"grad_norm": 0.030713744461536407,
|
|
"kl": 3.699958324432373e-05,
|
|
"learning_rate": 7e-07,
|
|
"loss": 0.0191,
|
|
"reward": 0.5047293808311224,
|
|
"reward_std": 0.9456561654806137,
|
|
"step": 8
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2575.715316772461,
|
|
"dapo/avg_reward_std": 0.26183396059533826,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.4275362387947414,
|
|
"dapo/num_sampling_attempts": 2.875,
|
|
"dapo/sampling_efficiency": 56.25,
|
|
"dapo/total_prompts_processed": 17.25,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.010285714285714285,
|
|
"grad_norm": 0.02862783893942833,
|
|
"kl": 3.787875175476074e-05,
|
|
"learning_rate": 8e-07,
|
|
"loss": 0.0251,
|
|
"reward": 0.49641977716237307,
|
|
"reward_std": 0.9346907436847687,
|
|
"step": 9
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2574.7951431274414,
|
|
"dapo/avg_reward_std": 0.2888991279261453,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.46031746694019865,
|
|
"dapo/num_sampling_attempts": 2.625,
|
|
"dapo/sampling_efficiency": 61.875,
|
|
"dapo/total_prompts_processed": 15.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.011428571428571429,
|
|
"grad_norm": 0.03313002362847328,
|
|
"kl": 2.9653310775756836e-05,
|
|
"learning_rate": 9e-07,
|
|
"loss": 0.0131,
|
|
"reward": 0.6514056231826544,
|
|
"reward_std": 0.9486276879906654,
|
|
"step": 10
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2648.3541870117188,
|
|
"dapo/avg_reward_std": 0.1985154973136054,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.23333333631356556,
|
|
"dapo/num_sampling_attempts": 5.625,
|
|
"dapo/sampling_efficiency": 22.747252747252745,
|
|
"dapo/total_prompts_processed": 33.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.012571428571428572,
|
|
"grad_norm": 0.02842891961336136,
|
|
"kl": 4.6372413635253906e-05,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0228,
|
|
"reward": 0.3831507060676813,
|
|
"reward_std": 0.9138674512505531,
|
|
"step": 11
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2340.7708435058594,
|
|
"dapo/avg_reward_std": 0.21896107792854308,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.25000000558793545,
|
|
"dapo/num_sampling_attempts": 5.0,
|
|
"dapo/sampling_efficiency": 29.791666666666664,
|
|
"dapo/total_prompts_processed": 30.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.013714285714285714,
|
|
"grad_norm": 0.02896970883011818,
|
|
"kl": 3.764033317565918e-05,
|
|
"learning_rate": 9.997258721585931e-07,
|
|
"loss": 0.0141,
|
|
"reward": 0.3742078524082899,
|
|
"reward_std": 0.9111683145165443,
|
|
"step": 12
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2731.9687576293945,
|
|
"dapo/avg_reward_std": 0.2593883651274222,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.39506174016881873,
|
|
"dapo/num_sampling_attempts": 3.375,
|
|
"dapo/sampling_efficiency": 43.95833333333333,
|
|
"dapo/total_prompts_processed": 20.25,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.014857142857142857,
|
|
"grad_norm": 0.028494343161582947,
|
|
"kl": 4.1812658309936523e-05,
|
|
"learning_rate": 9.989038226169207e-07,
|
|
"loss": 0.0482,
|
|
"reward": 0.37119605229236186,
|
|
"reward_std": 0.9484475553035736,
|
|
"step": 13
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2346.684066772461,
|
|
"dapo/avg_reward_std": 0.2633256334247011,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3787878860126842,
|
|
"dapo/num_sampling_attempts": 4.125,
|
|
"dapo/sampling_efficiency": 40.416666666666664,
|
|
"dapo/total_prompts_processed": 24.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.016,
|
|
"grad_norm": 0.03419339284300804,
|
|
"kl": 3.219395875930786e-05,
|
|
"learning_rate": 9.975348529157229e-07,
|
|
"loss": 0.0443,
|
|
"reward": 0.5307169873267412,
|
|
"reward_std": 0.8819384500384331,
|
|
"step": 14
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2438.8437881469727,
|
|
"dapo/avg_reward_std": 0.31698794450078693,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.48412699571677614,
|
|
"dapo/num_sampling_attempts": 2.625,
|
|
"dapo/sampling_efficiency": 49.99999999999999,
|
|
"dapo/total_prompts_processed": 15.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.017142857142857144,
|
|
"grad_norm": 0.03230522945523262,
|
|
"kl": 3.4749507904052734e-05,
|
|
"learning_rate": 9.956206309337066e-07,
|
|
"loss": 0.0519,
|
|
"reward": 0.6968788839876652,
|
|
"reward_std": 0.9826493486762047,
|
|
"step": 15
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2835.3125076293945,
|
|
"dapo/avg_reward_std": 0.2820873036980629,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.36111111876865226,
|
|
"dapo/num_sampling_attempts": 3.0,
|
|
"dapo/sampling_efficiency": 49.375,
|
|
"dapo/total_prompts_processed": 18.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.018285714285714287,
|
|
"grad_norm": 0.026719439774751663,
|
|
"kl": 3.375113010406494e-05,
|
|
"learning_rate": 9.931634888554935e-07,
|
|
"loss": 0.0158,
|
|
"reward": 0.4585288055241108,
|
|
"reward_std": 0.9621468484401703,
|
|
"step": 16
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2489.513870239258,
|
|
"dapo/avg_reward_std": 0.24821309347947437,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.35000000447034835,
|
|
"dapo/num_sampling_attempts": 3.75,
|
|
"dapo/sampling_efficiency": 51.25,
|
|
"dapo/total_prompts_processed": 22.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.019428571428571427,
|
|
"grad_norm": 0.030841730535030365,
|
|
"kl": 3.2588839530944824e-05,
|
|
"learning_rate": 9.901664203302124e-07,
|
|
"loss": 0.0342,
|
|
"reward": 0.4615583084523678,
|
|
"reward_std": 0.8882262408733368,
|
|
"step": 17
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2291.8854217529297,
|
|
"dapo/avg_reward_std": 0.3492339625954628,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.4000000149011612,
|
|
"dapo/num_sampling_attempts": 2.5,
|
|
"dapo/sampling_efficiency": 46.87499999999999,
|
|
"dapo/total_prompts_processed": 15.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.02057142857142857,
|
|
"grad_norm": 0.4981432557106018,
|
|
"kl": 4.331767559051514e-05,
|
|
"learning_rate": 9.866330768241983e-07,
|
|
"loss": 0.0782,
|
|
"reward": 0.5650830613449216,
|
|
"reward_std": 0.960162565112114,
|
|
"step": 18
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 1727.9479217529297,
|
|
"dapo/avg_reward_std": 0.2201171379822951,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.2863247940937678,
|
|
"dapo/num_sampling_attempts": 4.875,
|
|
"dapo/sampling_efficiency": 27.01388888888889,
|
|
"dapo/total_prompts_processed": 29.25,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.021714285714285714,
|
|
"grad_norm": 0.034473638981580734,
|
|
"kl": 2.7894973754882812e-05,
|
|
"learning_rate": 9.825677631722435e-07,
|
|
"loss": -0.0027,
|
|
"reward": 0.5283844769001007,
|
|
"reward_std": 0.9302913695573807,
|
|
"step": 19
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 1848.9062576293945,
|
|
"dapo/avg_reward_std": 0.2080523163983316,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3030303070942561,
|
|
"dapo/num_sampling_attempts": 4.125,
|
|
"dapo/sampling_efficiency": 40.74404761904762,
|
|
"dapo/total_prompts_processed": 24.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.022857142857142857,
|
|
"grad_norm": 0.03650596737861633,
|
|
"kl": 2.997368574142456e-05,
|
|
"learning_rate": 9.779754323328192e-07,
|
|
"loss": 0.0066,
|
|
"reward": 0.47246094793081284,
|
|
"reward_std": 0.925552561879158,
|
|
"step": 20
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2310.6354370117188,
|
|
"dapo/avg_reward_std": 0.18431008011102676,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.26250000260770323,
|
|
"dapo/num_sampling_attempts": 5.0,
|
|
"dapo/sampling_efficiency": 32.53472222222222,
|
|
"dapo/total_prompts_processed": 30.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.024,
|
|
"grad_norm": 0.02872428111732006,
|
|
"kl": 3.707408905029297e-05,
|
|
"learning_rate": 9.728616793536587e-07,
|
|
"loss": 0.0041,
|
|
"reward": 0.5466808546334505,
|
|
"reward_std": 0.9614025354385376,
|
|
"step": 21
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2628.4618072509766,
|
|
"dapo/avg_reward_std": 0.27239492272629456,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3235294157091309,
|
|
"dapo/num_sampling_attempts": 4.25,
|
|
"dapo/sampling_efficiency": 26.875,
|
|
"dapo/total_prompts_processed": 25.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.025142857142857144,
|
|
"grad_norm": 0.03156612813472748,
|
|
"kl": 4.024803638458252e-05,
|
|
"learning_rate": 9.672327345550543e-07,
|
|
"loss": 0.0396,
|
|
"reward": 0.4231120813637972,
|
|
"reward_std": 0.9312948659062386,
|
|
"step": 22
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2495.7673873901367,
|
|
"dapo/avg_reward_std": 0.30711027341229574,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3988095335662365,
|
|
"dapo/num_sampling_attempts": 3.5,
|
|
"dapo/sampling_efficiency": 31.249999999999993,
|
|
"dapo/total_prompts_processed": 21.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.026285714285714287,
|
|
"grad_norm": 0.028224533423781395,
|
|
"kl": 3.413856029510498e-05,
|
|
"learning_rate": 9.610954559391704e-07,
|
|
"loss": 0.0195,
|
|
"reward": 0.5285261562094092,
|
|
"reward_std": 0.9373103529214859,
|
|
"step": 23
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 1944.9201278686523,
|
|
"dapo/avg_reward_std": 0.29968351125717163,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.4533333480358124,
|
|
"dapo/num_sampling_attempts": 3.125,
|
|
"dapo/sampling_efficiency": 44.27083333333333,
|
|
"dapo/total_prompts_processed": 18.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.027428571428571427,
|
|
"grad_norm": 0.03633056953549385,
|
|
"kl": 3.1538307666778564e-05,
|
|
"learning_rate": 9.54457320834625e-07,
|
|
"loss": 0.0693,
|
|
"reward": 0.5397752095013857,
|
|
"reward_std": 0.9495814517140388,
|
|
"step": 24
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2616.593780517578,
|
|
"dapo/avg_reward_std": 0.16712580593127124,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.19811321232678755,
|
|
"dapo/num_sampling_attempts": 6.625,
|
|
"dapo/sampling_efficiency": 19.166666666666664,
|
|
"dapo/total_prompts_processed": 39.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.02857142857142857,
|
|
"grad_norm": 0.024344539269804955,
|
|
"kl": 3.676116466522217e-05,
|
|
"learning_rate": 9.473264167865171e-07,
|
|
"loss": 0.0139,
|
|
"reward": 0.3185653127729893,
|
|
"reward_std": 0.9151088818907738,
|
|
"step": 25
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2116.7257232666016,
|
|
"dapo/avg_reward_std": 0.27600910129218265,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.33908046319566926,
|
|
"dapo/num_sampling_attempts": 3.625,
|
|
"dapo/sampling_efficiency": 44.6875,
|
|
"dapo/total_prompts_processed": 21.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.029714285714285714,
|
|
"grad_norm": 0.031155193224549294,
|
|
"kl": 3.579258918762207e-05,
|
|
"learning_rate": 9.397114317029974e-07,
|
|
"loss": 0.0725,
|
|
"reward": 0.5197067707777023,
|
|
"reward_std": 0.8911866471171379,
|
|
"step": 26
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2148.781265258789,
|
|
"dapo/avg_reward_std": 0.24896243140101432,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.31666667349636557,
|
|
"dapo/num_sampling_attempts": 5.0,
|
|
"dapo/sampling_efficiency": 22.63888888888889,
|
|
"dapo/total_prompts_processed": 30.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.030857142857142857,
|
|
"grad_norm": 0.03762076795101166,
|
|
"kl": 3.104656934738159e-05,
|
|
"learning_rate": 9.316216432703916e-07,
|
|
"loss": -0.0333,
|
|
"reward": 0.5081147998571396,
|
|
"reward_std": 0.9414060413837433,
|
|
"step": 27
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2357.4062881469727,
|
|
"dapo/avg_reward_std": 0.22747237629750194,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.2990196110571132,
|
|
"dapo/num_sampling_attempts": 4.25,
|
|
"dapo/sampling_efficiency": 34.49404761904761,
|
|
"dapo/total_prompts_processed": 25.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.032,
|
|
"grad_norm": 0.02982812374830246,
|
|
"kl": 2.621859312057495e-05,
|
|
"learning_rate": 9.230669076497687e-07,
|
|
"loss": 0.0231,
|
|
"reward": 0.7687274925410748,
|
|
"reward_std": 0.9382865354418755,
|
|
"step": 28
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2772.941047668457,
|
|
"dapo/avg_reward_std": 0.2300749086972439,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.28282828854792047,
|
|
"dapo/num_sampling_attempts": 4.125,
|
|
"dapo/sampling_efficiency": 48.482142857142854,
|
|
"dapo/total_prompts_processed": 24.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.03314285714285714,
|
|
"grad_norm": 0.030160676687955856,
|
|
"kl": 2.812594175338745e-05,
|
|
"learning_rate": 9.140576474687263e-07,
|
|
"loss": 0.0019,
|
|
"reward": 0.41888202354311943,
|
|
"reward_std": 0.9044449031352997,
|
|
"step": 29
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2038.208366394043,
|
|
"dapo/avg_reward_std": 0.1657373425437183,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.21544715943859843,
|
|
"dapo/num_sampling_attempts": 5.125,
|
|
"dapo/sampling_efficiency": 45.71969696969697,
|
|
"dapo/total_prompts_processed": 30.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.03428571428571429,
|
|
"grad_norm": 0.040263354778289795,
|
|
"kl": 3.8951635360717773e-05,
|
|
"learning_rate": 9.046048391230247e-07,
|
|
"loss": 0.0158,
|
|
"reward": 0.6328074131160975,
|
|
"reward_std": 0.913766622543335,
|
|
"step": 30
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2610.149299621582,
|
|
"dapo/avg_reward_std": 0.24689391613006592,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.39333333909511564,
|
|
"dapo/num_sampling_attempts": 3.125,
|
|
"dapo/sampling_efficiency": 50.74404761904762,
|
|
"dapo/total_prompts_processed": 18.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.03542857142857143,
|
|
"grad_norm": 0.03027450665831566,
|
|
"kl": 3.1307339668273926e-05,
|
|
"learning_rate": 8.9471999940354e-07,
|
|
"loss": 0.0264,
|
|
"reward": 0.6263847425580025,
|
|
"reward_std": 0.9919310808181763,
|
|
"step": 31
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2505.697952270508,
|
|
"dapo/avg_reward_std": 0.26817766793312564,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.34946237216072695,
|
|
"dapo/num_sampling_attempts": 3.875,
|
|
"dapo/sampling_efficiency": 33.68055555555555,
|
|
"dapo/total_prompts_processed": 23.25,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.036571428571428574,
|
|
"grad_norm": 0.02961750328540802,
|
|
"kl": 2.7127563953399658e-05,
|
|
"learning_rate": 8.844151714648274e-07,
|
|
"loss": 0.0166,
|
|
"reward": 0.6057538501918316,
|
|
"reward_std": 0.9584499895572662,
|
|
"step": 32
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2879.420181274414,
|
|
"dapo/avg_reward_std": 0.24957223816050422,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.2824074120985137,
|
|
"dapo/num_sampling_attempts": 4.5,
|
|
"dapo/sampling_efficiency": 35.51136363636363,
|
|
"dapo/total_prompts_processed": 27.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.037714285714285714,
|
|
"grad_norm": 0.028292173519730568,
|
|
"kl": 2.950429916381836e-05,
|
|
"learning_rate": 8.737029101523929e-07,
|
|
"loss": 0.032,
|
|
"reward": 0.4974850555881858,
|
|
"reward_std": 0.9284666180610657,
|
|
"step": 33
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2605.826400756836,
|
|
"dapo/avg_reward_std": 0.27582160755991936,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.41666667101283866,
|
|
"dapo/num_sampling_attempts": 3.0,
|
|
"dapo/sampling_efficiency": 42.70833333333333,
|
|
"dapo/total_prompts_processed": 18.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.038857142857142854,
|
|
"grad_norm": 0.028110038489103317,
|
|
"kl": 3.172457218170166e-05,
|
|
"learning_rate": 8.625962667065487e-07,
|
|
"loss": 0.0358,
|
|
"reward": 0.5906332535669208,
|
|
"reward_std": 0.8970795348286629,
|
|
"step": 34
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2197.09033203125,
|
|
"dapo/avg_reward_std": 0.2899627904097239,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3722222303350767,
|
|
"dapo/num_sampling_attempts": 3.75,
|
|
"dapo/sampling_efficiency": 33.035714285714285,
|
|
"dapo/total_prompts_processed": 22.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.04,
|
|
"grad_norm": 0.03307325020432472,
|
|
"kl": 3.203749656677246e-05,
|
|
"learning_rate": 8.511087728614862e-07,
|
|
"loss": 0.024,
|
|
"reward": 0.6485824584960938,
|
|
"reward_std": 0.9721796959638596,
|
|
"step": 35
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2999.3507080078125,
|
|
"dapo/avg_reward_std": 0.20956570729613305,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.26250000707805154,
|
|
"dapo/num_sampling_attempts": 5.0,
|
|
"dapo/sampling_efficiency": 22.51488095238095,
|
|
"dapo/total_prompts_processed": 30.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.04114285714285714,
|
|
"grad_norm": 0.028769005089998245,
|
|
"kl": 3.2588839530944824e-05,
|
|
"learning_rate": 8.392544243589427e-07,
|
|
"loss": 0.0619,
|
|
"reward": 0.48274967167526484,
|
|
"reward_std": 0.8917501345276833,
|
|
"step": 36
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2790.3020935058594,
|
|
"dapo/avg_reward_std": 0.30638546783190507,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.42307692995438206,
|
|
"dapo/num_sampling_attempts": 3.25,
|
|
"dapo/sampling_efficiency": 35.20833333333333,
|
|
"dapo/total_prompts_processed": 19.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.04228571428571429,
|
|
"grad_norm": 0.026894288137555122,
|
|
"kl": 3.5509467124938965e-05,
|
|
"learning_rate": 8.270476638965461e-07,
|
|
"loss": 0.0283,
|
|
"reward": 0.5098943561315536,
|
|
"reward_std": 0.9712026715278625,
|
|
"step": 37
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2677.1493530273438,
|
|
"dapo/avg_reward_std": 0.18201035128699408,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.2481481538878547,
|
|
"dapo/num_sampling_attempts": 5.625,
|
|
"dapo/sampling_efficiency": 25.416666666666664,
|
|
"dapo/total_prompts_processed": 33.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.04342857142857143,
|
|
"grad_norm": 0.027049226686358452,
|
|
"kl": 2.641230821609497e-05,
|
|
"learning_rate": 8.145033635316128e-07,
|
|
"loss": 0.0457,
|
|
"reward": 0.507211847230792,
|
|
"reward_std": 0.9677048400044441,
|
|
"step": 38
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 3130.437530517578,
|
|
"dapo/avg_reward_std": 0.2055508976473528,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3137254956014016,
|
|
"dapo/num_sampling_attempts": 4.25,
|
|
"dapo/sampling_efficiency": 26.160714285714278,
|
|
"dapo/total_prompts_processed": 25.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.044571428571428574,
|
|
"grad_norm": 0.027378324419260025,
|
|
"kl": 4.1447579860687256e-05,
|
|
"learning_rate": 8.01636806561836e-07,
|
|
"loss": 0.0522,
|
|
"reward": 0.5557294674217701,
|
|
"reward_std": 0.9394431114196777,
|
|
"step": 39
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2026.0486297607422,
|
|
"dapo/avg_reward_std": 0.20257248067193562,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.2916666749450896,
|
|
"dapo/num_sampling_attempts": 4.5,
|
|
"dapo/sampling_efficiency": 29.86111111111111,
|
|
"dapo/total_prompts_processed": 27.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.045714285714285714,
|
|
"grad_norm": 0.032405752688646317,
|
|
"kl": 1.9609928131103516e-05,
|
|
"learning_rate": 7.884636689049422e-07,
|
|
"loss": 0.0336,
|
|
"reward": 0.5694049745798111,
|
|
"reward_std": 0.9232507050037384,
|
|
"step": 40
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2640.326416015625,
|
|
"dapo/avg_reward_std": 0.21237638321789828,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.34343435231483344,
|
|
"dapo/num_sampling_attempts": 4.125,
|
|
"dapo/sampling_efficiency": 29.791666666666664,
|
|
"dapo/total_prompts_processed": 24.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.046857142857142854,
|
|
"grad_norm": 0.027951980009675026,
|
|
"kl": 2.6788562536239624e-05,
|
|
"learning_rate": 7.75e-07,
|
|
"loss": 0.0234,
|
|
"reward": 0.5206635389477015,
|
|
"reward_std": 0.9366661533713341,
|
|
"step": 41
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2681.18058013916,
|
|
"dapo/avg_reward_std": 0.24859387196343521,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3218390854268238,
|
|
"dapo/num_sampling_attempts": 3.625,
|
|
"dapo/sampling_efficiency": 35.416666666666664,
|
|
"dapo/total_prompts_processed": 21.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.048,
|
|
"grad_norm": 0.03045503795146942,
|
|
"kl": 3.679096698760986e-05,
|
|
"learning_rate": 7.612622032536507e-07,
|
|
"loss": 0.0237,
|
|
"reward": 0.4700614605098963,
|
|
"reward_std": 0.9389084428548813,
|
|
"step": 42
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2398.7118072509766,
|
|
"dapo/avg_reward_std": 0.2748411413161985,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.322580651890847,
|
|
"dapo/num_sampling_attempts": 3.875,
|
|
"dapo/sampling_efficiency": 29.999999999999996,
|
|
"dapo/total_prompts_processed": 23.25,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.04914285714285714,
|
|
"grad_norm": 0.02945004403591156,
|
|
"kl": 2.7336180210113525e-05,
|
|
"learning_rate": 7.472670160550848e-07,
|
|
"loss": -0.0567,
|
|
"reward": 0.6530590765178204,
|
|
"reward_std": 0.929742157459259,
|
|
"step": 43
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 1968.3437805175781,
|
|
"dapo/avg_reward_std": 0.20995861871374977,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.2685185232096248,
|
|
"dapo/num_sampling_attempts": 4.5,
|
|
"dapo/sampling_efficiency": 42.410714285714285,
|
|
"dapo/total_prompts_processed": 27.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.05028571428571429,
|
|
"grad_norm": 0.0354490801692009,
|
|
"kl": 1.671910285949707e-05,
|
|
"learning_rate": 7.330314893841101e-07,
|
|
"loss": 0.0869,
|
|
"reward": 0.6298563629388809,
|
|
"reward_std": 0.9230287447571754,
|
|
"step": 44
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2218.2743225097656,
|
|
"dapo/avg_reward_std": 0.260509067773819,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.36666667262713115,
|
|
"dapo/num_sampling_attempts": 3.75,
|
|
"dapo/sampling_efficiency": 33.229166666666664,
|
|
"dapo/total_prompts_processed": 22.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.05142857142857143,
|
|
"grad_norm": 0.02954520471394062,
|
|
"kl": 2.514384686946869e-05,
|
|
"learning_rate": 7.185729670371604e-07,
|
|
"loss": 0.0031,
|
|
"reward": 0.6325996220111847,
|
|
"reward_std": 0.9546400979161263,
|
|
"step": 45
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2081.1458587646484,
|
|
"dapo/avg_reward_std": 0.2187695243666249,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.2849462402443732,
|
|
"dapo/num_sampling_attempts": 3.875,
|
|
"dapo/sampling_efficiency": 37.22222222222222,
|
|
"dapo/total_prompts_processed": 23.25,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.052571428571428575,
|
|
"grad_norm": 0.033979643136262894,
|
|
"kl": 2.872943878173828e-05,
|
|
"learning_rate": 7.039090644965509e-07,
|
|
"loss": -0.0104,
|
|
"reward": 0.5167231820523739,
|
|
"reward_std": 0.9025325626134872,
|
|
"step": 46
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2117.541702270508,
|
|
"dapo/avg_reward_std": 0.18839570879936218,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.26811594580826553,
|
|
"dapo/num_sampling_attempts": 5.75,
|
|
"dapo/sampling_efficiency": 20.441919191919194,
|
|
"dapo/total_prompts_processed": 34.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.053714285714285714,
|
|
"grad_norm": 0.03177877888083458,
|
|
"kl": 3.078579902648926e-05,
|
|
"learning_rate": 6.890576474687263e-07,
|
|
"loss": 0.0077,
|
|
"reward": 0.3684711689129472,
|
|
"reward_std": 0.8811993673443794,
|
|
"step": 47
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2177.4444885253906,
|
|
"dapo/avg_reward_std": 0.19605370469995448,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.2702702763112816,
|
|
"dapo/num_sampling_attempts": 4.625,
|
|
"dapo/sampling_efficiency": 39.40972222222222,
|
|
"dapo/total_prompts_processed": 27.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.054857142857142854,
|
|
"grad_norm": 0.04067355766892433,
|
|
"kl": 2.4996697902679443e-05,
|
|
"learning_rate": 6.740368101176495e-07,
|
|
"loss": 0.0053,
|
|
"reward": 0.5635924749076366,
|
|
"reward_std": 0.9323460608720779,
|
|
"step": 48
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 3022.513885498047,
|
|
"dapo/avg_reward_std": 0.22437315998655377,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.30808081003752624,
|
|
"dapo/num_sampling_attempts": 4.125,
|
|
"dapo/sampling_efficiency": 51.880411255411246,
|
|
"dapo/total_prompts_processed": 24.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.056,
|
|
"grad_norm": 0.028243908658623695,
|
|
"kl": 3.2588839530944824e-05,
|
|
"learning_rate": 6.588648530198504e-07,
|
|
"loss": 0.0463,
|
|
"reward": 0.5983518976718187,
|
|
"reward_std": 0.97667645663023,
|
|
"step": 49
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2369.423614501953,
|
|
"dapo/avg_reward_std": 0.25065614397709185,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.36538461996958804,
|
|
"dapo/num_sampling_attempts": 3.25,
|
|
"dapo/sampling_efficiency": 51.666666666666664,
|
|
"dapo/total_prompts_processed": 19.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.05714285714285714,
|
|
"grad_norm": 0.03361990302801132,
|
|
"kl": 2.4838373064994812e-05,
|
|
"learning_rate": 6.435602608679916e-07,
|
|
"loss": -0.0041,
|
|
"reward": 0.6849855165928602,
|
|
"reward_std": 0.9522178247570992,
|
|
"step": 50
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2274.833396911621,
|
|
"dapo/avg_reward_std": 0.22345838612980312,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.2916666745311684,
|
|
"dapo/num_sampling_attempts": 4.5,
|
|
"dapo/sampling_efficiency": 27.132936507936506,
|
|
"dapo/total_prompts_processed": 27.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.05828571428571429,
|
|
"grad_norm": 0.031927697360515594,
|
|
"kl": 1.7890706658363342e-05,
|
|
"learning_rate": 6.281416799501187e-07,
|
|
"loss": 0.0196,
|
|
"reward": 0.8541890066117048,
|
|
"reward_std": 0.9146186113357544,
|
|
"step": 51
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2918.0799102783203,
|
|
"dapo/avg_reward_std": 0.28684074508732765,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3333333386429425,
|
|
"dapo/num_sampling_attempts": 3.625,
|
|
"dapo/sampling_efficiency": 41.36904761904762,
|
|
"dapo/total_prompts_processed": 21.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.05942857142857143,
|
|
"grad_norm": 0.026396779343485832,
|
|
"kl": 2.3087020963430405e-05,
|
|
"learning_rate": 6.126278954320294e-07,
|
|
"loss": 0.0343,
|
|
"reward": 0.44786757230758667,
|
|
"reward_std": 0.9706326127052307,
|
|
"step": 52
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2045.833339691162,
|
|
"dapo/avg_reward_std": 0.2355064716604021,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.2870370431078805,
|
|
"dapo/num_sampling_attempts": 4.5,
|
|
"dapo/sampling_efficiency": 31.354166666666664,
|
|
"dapo/total_prompts_processed": 27.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.060571428571428575,
|
|
"grad_norm": 0.04913632944226265,
|
|
"kl": 2.1755695343017578e-05,
|
|
"learning_rate": 5.97037808470444e-07,
|
|
"loss": 0.0387,
|
|
"reward": 0.6510349959135056,
|
|
"reward_std": 0.9507962614297867,
|
|
"step": 53
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 1948.9444427490234,
|
|
"dapo/avg_reward_std": 0.243668794631958,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.366666671037674,
|
|
"dapo/num_sampling_attempts": 3.125,
|
|
"dapo/sampling_efficiency": 56.5625,
|
|
"dapo/total_prompts_processed": 18.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.061714285714285715,
|
|
"grad_norm": 0.040572620928287506,
|
|
"kl": 2.1360814571380615e-05,
|
|
"learning_rate": 5.813904131848564e-07,
|
|
"loss": 0.0417,
|
|
"reward": 0.5514028863981366,
|
|
"reward_std": 0.9589040726423264,
|
|
"step": 54
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2484.541648864746,
|
|
"dapo/avg_reward_std": 0.30484401606596434,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.42307693224686843,
|
|
"dapo/num_sampling_attempts": 3.25,
|
|
"dapo/sampling_efficiency": 42.18749999999999,
|
|
"dapo/total_prompts_processed": 19.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.06285714285714286,
|
|
"grad_norm": 0.0297782514244318,
|
|
"kl": 2.2893771529197693e-05,
|
|
"learning_rate": 5.657047735161255e-07,
|
|
"loss": -0.0009,
|
|
"reward": 0.4546010522171855,
|
|
"reward_std": 0.9696914628148079,
|
|
"step": 55
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 1533.7361297607422,
|
|
"dapo/avg_reward_std": 0.2159253837484302,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.29797980415098596,
|
|
"dapo/num_sampling_attempts": 4.125,
|
|
"dapo/sampling_efficiency": 34.722222222222214,
|
|
"dapo/total_prompts_processed": 24.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.064,
|
|
"grad_norm": 0.03312206640839577,
|
|
"kl": 7.178634405136108e-06,
|
|
"learning_rate": 5.5e-07,
|
|
"loss": 0.0108,
|
|
"reward": 0.7257717102766037,
|
|
"reward_std": 0.9033158496022224,
|
|
"step": 56
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2934.4409942626953,
|
|
"dapo/avg_reward_std": 0.2505974847337474,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.36956522192644037,
|
|
"dapo/num_sampling_attempts": 2.875,
|
|
"dapo/sampling_efficiency": 41.66666666666666,
|
|
"dapo/total_prompts_processed": 17.25,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.06514285714285714,
|
|
"grad_norm": 0.02451159618794918,
|
|
"kl": 1.9356608390808105e-05,
|
|
"learning_rate": 5.342952264838747e-07,
|
|
"loss": 0.0483,
|
|
"reward": 0.5572653282433748,
|
|
"reward_std": 0.9176028743386269,
|
|
"step": 57
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 1933.5243377685547,
|
|
"dapo/avg_reward_std": 0.20699472725391388,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3235294174622087,
|
|
"dapo/num_sampling_attempts": 4.25,
|
|
"dapo/sampling_efficiency": 43.50198412698413,
|
|
"dapo/total_prompts_processed": 25.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.06628571428571428,
|
|
"grad_norm": 0.04205997660756111,
|
|
"kl": 2.446398138999939e-05,
|
|
"learning_rate": 5.186095868151436e-07,
|
|
"loss": 0.035,
|
|
"reward": 0.5425214860588312,
|
|
"reward_std": 0.9688811302185059,
|
|
"step": 58
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2404.819435119629,
|
|
"dapo/avg_reward_std": 0.21416518474236512,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.2649572701790394,
|
|
"dapo/num_sampling_attempts": 4.875,
|
|
"dapo/sampling_efficiency": 28.070436507936506,
|
|
"dapo/total_prompts_processed": 29.25,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.06742857142857143,
|
|
"grad_norm": 0.032379262149333954,
|
|
"kl": 2.0030885934829712e-05,
|
|
"learning_rate": 5.02962191529556e-07,
|
|
"loss": -0.0022,
|
|
"reward": 0.5781768467277288,
|
|
"reward_std": 0.9525356665253639,
|
|
"step": 59
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2963.888931274414,
|
|
"dapo/avg_reward_std": 0.32426256509054274,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.42857143637679873,
|
|
"dapo/num_sampling_attempts": 2.625,
|
|
"dapo/sampling_efficiency": 58.035714285714285,
|
|
"dapo/total_prompts_processed": 15.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.06857142857142857,
|
|
"grad_norm": 0.027211569249629974,
|
|
"kl": 1.7156358808279037e-05,
|
|
"learning_rate": 4.873721045679706e-07,
|
|
"loss": 0.0068,
|
|
"reward": 0.44747511111199856,
|
|
"reward_std": 0.9607158154249191,
|
|
"step": 60
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2205.2465591430664,
|
|
"dapo/avg_reward_std": 0.203433408588171,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.2500000063329935,
|
|
"dapo/num_sampling_attempts": 5.0,
|
|
"dapo/sampling_efficiency": 38.46153846153846,
|
|
"dapo/total_prompts_processed": 30.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.06971428571428571,
|
|
"grad_norm": 0.035166963934898376,
|
|
"kl": 1.146271824836731e-05,
|
|
"learning_rate": 4.7185832004988133e-07,
|
|
"loss": 0.0016,
|
|
"reward": 0.7233948148787022,
|
|
"reward_std": 0.9537224471569061,
|
|
"step": 61
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2170.302101135254,
|
|
"dapo/avg_reward_std": 0.3071755821054632,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.46212121776559134,
|
|
"dapo/num_sampling_attempts": 2.75,
|
|
"dapo/sampling_efficiency": 52.5,
|
|
"dapo/total_prompts_processed": 16.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.07085714285714285,
|
|
"grad_norm": 0.032445963472127914,
|
|
"kl": 1.7118407413363457e-05,
|
|
"learning_rate": 4.5643973913200837e-07,
|
|
"loss": 0.0133,
|
|
"reward": 0.5614959334488958,
|
|
"reward_std": 0.9226407110691071,
|
|
"step": 62
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2304.038215637207,
|
|
"dapo/avg_reward_std": 0.3201758420025861,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3827160596847534,
|
|
"dapo/num_sampling_attempts": 3.375,
|
|
"dapo/sampling_efficiency": 33.33333333333333,
|
|
"dapo/total_prompts_processed": 20.25,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.072,
|
|
"grad_norm": 0.03544686362147331,
|
|
"kl": 1.1014439223799855e-05,
|
|
"learning_rate": 4.4113514698014953e-07,
|
|
"loss": 0.0809,
|
|
"reward": 0.6520206034183502,
|
|
"reward_std": 0.9506091177463531,
|
|
"step": 63
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 1901.3506965637207,
|
|
"dapo/avg_reward_std": 0.2710137654233862,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.33950617964620944,
|
|
"dapo/num_sampling_attempts": 3.375,
|
|
"dapo/sampling_efficiency": 38.541666666666664,
|
|
"dapo/total_prompts_processed": 20.25,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.07314285714285715,
|
|
"grad_norm": 0.044119708240032196,
|
|
"kl": 2.606213092803955e-05,
|
|
"learning_rate": 4.2596318988235037e-07,
|
|
"loss": 0.0059,
|
|
"reward": 0.6546321045607328,
|
|
"reward_std": 0.9510733336210251,
|
|
"step": 64
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2792.0382232666016,
|
|
"dapo/avg_reward_std": 0.2836403740303857,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.36904762951391085,
|
|
"dapo/num_sampling_attempts": 3.5,
|
|
"dapo/sampling_efficiency": 39.58333333333333,
|
|
"dapo/total_prompts_processed": 21.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.07428571428571429,
|
|
"grad_norm": 0.04388947784900665,
|
|
"kl": 1.2818491086363792e-05,
|
|
"learning_rate": 4.1094235253127374e-07,
|
|
"loss": 0.0675,
|
|
"reward": 0.5376700833439827,
|
|
"reward_std": 0.9546815231442451,
|
|
"step": 65
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 3018.1111450195312,
|
|
"dapo/avg_reward_std": 0.2566617141167323,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.35000000993410746,
|
|
"dapo/num_sampling_attempts": 3.75,
|
|
"dapo/sampling_efficiency": 29.583333333333325,
|
|
"dapo/total_prompts_processed": 22.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.07542857142857143,
|
|
"grad_norm": 0.030510403215885162,
|
|
"kl": 2.337433397769928e-05,
|
|
"learning_rate": 3.9609093550344907e-07,
|
|
"loss": 0.067,
|
|
"reward": 0.45654861629009247,
|
|
"reward_std": 0.9348908290266991,
|
|
"step": 66
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2246.7361183166504,
|
|
"dapo/avg_reward_std": 0.17681238457963272,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.2657657684506597,
|
|
"dapo/num_sampling_attempts": 4.625,
|
|
"dapo/sampling_efficiency": 39.75198412698412,
|
|
"dapo/total_prompts_processed": 27.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.07657142857142857,
|
|
"grad_norm": 0.039485227316617966,
|
|
"kl": 3.0115246772766113e-05,
|
|
"learning_rate": 3.8142703296283953e-07,
|
|
"loss": -0.0103,
|
|
"reward": 0.559457328170538,
|
|
"reward_std": 0.9844456240534782,
|
|
"step": 67
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 1877.3090591430664,
|
|
"dapo/avg_reward_std": 0.21082516993795122,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.2809523867709296,
|
|
"dapo/num_sampling_attempts": 4.375,
|
|
"dapo/sampling_efficiency": 40.13888888888889,
|
|
"dapo/total_prompts_processed": 26.25,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.07771428571428571,
|
|
"grad_norm": 0.04208315163850784,
|
|
"kl": 1.7916783690452576e-05,
|
|
"learning_rate": 3.6696851061588994e-07,
|
|
"loss": 0.0055,
|
|
"reward": 0.71805115416646,
|
|
"reward_std": 0.9486410617828369,
|
|
"step": 68
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2743.187484741211,
|
|
"dapo/avg_reward_std": 0.3629622704842511,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.5882353020064971,
|
|
"dapo/num_sampling_attempts": 2.125,
|
|
"dapo/sampling_efficiency": 57.291666666666664,
|
|
"dapo/total_prompts_processed": 12.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.07885714285714286,
|
|
"grad_norm": 0.046305615454912186,
|
|
"kl": 1.8481165170669556e-05,
|
|
"learning_rate": 3.5273298394491515e-07,
|
|
"loss": 0.0753,
|
|
"reward": 0.5533816255629063,
|
|
"reward_std": 0.9835677221417427,
|
|
"step": 69
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 1971.8750610351562,
|
|
"dapo/avg_reward_std": 0.290031298995018,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3958333432674408,
|
|
"dapo/num_sampling_attempts": 3.0,
|
|
"dapo/sampling_efficiency": 50.11904761904761,
|
|
"dapo/total_prompts_processed": 18.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.08,
|
|
"grad_norm": 0.03249451890587807,
|
|
"kl": 1.0361894965171814e-05,
|
|
"learning_rate": 3.387377967463493e-07,
|
|
"loss": 0.0123,
|
|
"reward": 0.7815902195870876,
|
|
"reward_std": 0.9491127580404282,
|
|
"step": 70
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2149.5729370117188,
|
|
"dapo/avg_reward_std": 0.30720199798715525,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.37931035356274967,
|
|
"dapo/num_sampling_attempts": 3.625,
|
|
"dapo/sampling_efficiency": 31.666666666666664,
|
|
"dapo/total_prompts_processed": 21.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.08114285714285714,
|
|
"grad_norm": 0.02995998226106167,
|
|
"kl": 2.8252601623535156e-05,
|
|
"learning_rate": 3.250000000000001e-07,
|
|
"loss": 0.0769,
|
|
"reward": 0.5328625496476889,
|
|
"reward_std": 0.9026356488466263,
|
|
"step": 71
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 1963.1562538146973,
|
|
"dapo/avg_reward_std": 0.27671699684399825,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.4551282163995963,
|
|
"dapo/num_sampling_attempts": 3.25,
|
|
"dapo/sampling_efficiency": 46.041666666666664,
|
|
"dapo/total_prompts_processed": 19.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.08228571428571428,
|
|
"grad_norm": 0.046918418258428574,
|
|
"kl": 3.359094262123108e-05,
|
|
"learning_rate": 3.115363310950578e-07,
|
|
"loss": 0.0368,
|
|
"reward": 0.32596728252246976,
|
|
"reward_std": 0.917833186686039,
|
|
"step": 72
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2666.1666717529297,
|
|
"dapo/avg_reward_std": 0.2536189202219248,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.34895834140479565,
|
|
"dapo/num_sampling_attempts": 4.0,
|
|
"dapo/sampling_efficiency": 37.84722222222222,
|
|
"dapo/total_prompts_processed": 24.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.08342857142857144,
|
|
"grad_norm": 0.0253219623118639,
|
|
"kl": 3.542192280292511e-05,
|
|
"learning_rate": 2.9836319343816397e-07,
|
|
"loss": 0.0107,
|
|
"reward": 0.6293175183236599,
|
|
"reward_std": 0.935965321958065,
|
|
"step": 73
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2119.447982788086,
|
|
"dapo/avg_reward_std": 0.26048696994781495,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.4200000029802322,
|
|
"dapo/num_sampling_attempts": 3.125,
|
|
"dapo/sampling_efficiency": 47.291666666666664,
|
|
"dapo/total_prompts_processed": 18.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.08457142857142858,
|
|
"grad_norm": 0.034480538219213486,
|
|
"kl": 1.7508864402770996e-05,
|
|
"learning_rate": 2.854966364683872e-07,
|
|
"loss": 0.0483,
|
|
"reward": 0.7494360618293285,
|
|
"reward_std": 0.9492424502968788,
|
|
"step": 74
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2078.9375,
|
|
"dapo/avg_reward_std": 0.2828026126932215,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3580246976128331,
|
|
"dapo/num_sampling_attempts": 3.375,
|
|
"dapo/sampling_efficiency": 35.11904761904762,
|
|
"dapo/total_prompts_processed": 20.25,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.08571428571428572,
|
|
"grad_norm": 0.03545458987355232,
|
|
"kl": 1.3923272490501404e-05,
|
|
"learning_rate": 2.729523361034538e-07,
|
|
"loss": 0.0531,
|
|
"reward": 0.5464182365685701,
|
|
"reward_std": 0.9530047550797462,
|
|
"step": 75
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2342.5416564941406,
|
|
"dapo/avg_reward_std": 0.21854268149896103,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3080808154561303,
|
|
"dapo/num_sampling_attempts": 4.125,
|
|
"dapo/sampling_efficiency": 32.341269841269835,
|
|
"dapo/total_prompts_processed": 24.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.08685714285714285,
|
|
"grad_norm": 0.02881987765431404,
|
|
"kl": 1.169554889202118e-05,
|
|
"learning_rate": 2.6074557564105724e-07,
|
|
"loss": 0.0077,
|
|
"reward": 0.5642017107456923,
|
|
"reward_std": 0.9335212334990501,
|
|
"step": 76
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 3205.104217529297,
|
|
"dapo/avg_reward_std": 0.2153491945493789,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.2777777835726738,
|
|
"dapo/num_sampling_attempts": 5.25,
|
|
"dapo/sampling_efficiency": 23.45238095238095,
|
|
"dapo/total_prompts_processed": 31.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.088,
|
|
"grad_norm": 0.024909108877182007,
|
|
"kl": 2.2567808628082275e-05,
|
|
"learning_rate": 2.488912271385139e-07,
|
|
"loss": 0.0436,
|
|
"reward": 0.4511043671518564,
|
|
"reward_std": 0.9582105726003647,
|
|
"step": 77
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 1984.7881927490234,
|
|
"dapo/avg_reward_std": 0.2325562967194451,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3703703780968984,
|
|
"dapo/num_sampling_attempts": 3.375,
|
|
"dapo/sampling_efficiency": 46.354166666666664,
|
|
"dapo/total_prompts_processed": 20.25,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.08914285714285715,
|
|
"grad_norm": 0.04120900481939316,
|
|
"kl": 2.2590160369873047e-05,
|
|
"learning_rate": 2.374037332934512e-07,
|
|
"loss": 0.0514,
|
|
"reward": 0.46765367314219475,
|
|
"reward_std": 0.9171552434563637,
|
|
"step": 78
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2322.930576324463,
|
|
"dapo/avg_reward_std": 0.24565138667821884,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.35416666977107525,
|
|
"dapo/num_sampling_attempts": 3.0,
|
|
"dapo/sampling_efficiency": 49.375,
|
|
"dapo/total_prompts_processed": 18.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.09028571428571429,
|
|
"grad_norm": 0.03351881355047226,
|
|
"kl": 1.6979873180389404e-05,
|
|
"learning_rate": 2.2629708984760706e-07,
|
|
"loss": 0.0813,
|
|
"reward": 0.4460947550833225,
|
|
"reward_std": 0.9485716819763184,
|
|
"step": 79
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2418.187545776367,
|
|
"dapo/avg_reward_std": 0.23119631229024945,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.2929292975953131,
|
|
"dapo/num_sampling_attempts": 4.125,
|
|
"dapo/sampling_efficiency": 37.013888888888886,
|
|
"dapo/total_prompts_processed": 24.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.09142857142857143,
|
|
"grad_norm": 0.03444164991378784,
|
|
"kl": 1.9297003746032715e-05,
|
|
"learning_rate": 2.1558482853517253e-07,
|
|
"loss": -0.0123,
|
|
"reward": 0.47735430393368006,
|
|
"reward_std": 0.9275016784667969,
|
|
"step": 80
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2673.1666870117188,
|
|
"dapo/avg_reward_std": 0.29530651973826544,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.39285714977553915,
|
|
"dapo/num_sampling_attempts": 3.5,
|
|
"dapo/sampling_efficiency": 40.52083333333333,
|
|
"dapo/total_prompts_processed": 21.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.09257142857142857,
|
|
"grad_norm": 0.02858138270676136,
|
|
"kl": 1.998385414481163e-05,
|
|
"learning_rate": 2.0528000059645995e-07,
|
|
"loss": 0.034,
|
|
"reward": 0.41152474470436573,
|
|
"reward_std": 0.9514285027980804,
|
|
"step": 81
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2257.954864501953,
|
|
"dapo/avg_reward_std": 0.23162428935368856,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3277777835726738,
|
|
"dapo/num_sampling_attempts": 3.75,
|
|
"dapo/sampling_efficiency": 39.72222222222222,
|
|
"dapo/total_prompts_processed": 22.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.09371428571428571,
|
|
"grad_norm": 0.034180980175733566,
|
|
"kl": 1.03069469332695e-05,
|
|
"learning_rate": 7.681643291108517e-07,
|
|
"loss": 0.0478,
|
|
"reward": 0.6525773257017136,
|
|
"reward_std": 0.9826234132051468,
|
|
"step": 82
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2630.8507080078125,
|
|
"dapo/avg_reward_std": 0.25974711243595394,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3511904797383717,
|
|
"dapo/num_sampling_attempts": 3.5,
|
|
"dapo/sampling_efficiency": 49.166666666666664,
|
|
"dapo/total_prompts_processed": 21.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.09485714285714286,
|
|
"grad_norm": 0.03644736111164093,
|
|
"kl": 1.800060272216797e-05,
|
|
"learning_rate": 7.612622032536507e-07,
|
|
"loss": 0.0921,
|
|
"reward": 0.4112757742404938,
|
|
"reward_std": 0.9365755990147591,
|
|
"step": 83
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2569.4896087646484,
|
|
"dapo/avg_reward_std": 0.20397330891518367,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.22619048080274037,
|
|
"dapo/num_sampling_attempts": 5.25,
|
|
"dapo/sampling_efficiency": 33.541666666666664,
|
|
"dapo/total_prompts_processed": 31.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.096,
|
|
"grad_norm": 0.027630111202597618,
|
|
"kl": 9.745359420776367e-06,
|
|
"learning_rate": 7.54295724882796e-07,
|
|
"loss": 0.0357,
|
|
"reward": 0.41497555933892727,
|
|
"reward_std": 0.9506618455052376,
|
|
"step": 84
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2213.0660400390625,
|
|
"dapo/avg_reward_std": 0.2754218357224618,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.33333334038334506,
|
|
"dapo/num_sampling_attempts": 3.875,
|
|
"dapo/sampling_efficiency": 36.354166666666664,
|
|
"dapo/total_prompts_processed": 23.25,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.09714285714285714,
|
|
"grad_norm": 0.035216327756643295,
|
|
"kl": 1.6536563634872437e-05,
|
|
"learning_rate": 7.472670160550848e-07,
|
|
"loss": 0.0527,
|
|
"reward": 0.632079154253006,
|
|
"reward_std": 0.9386599361896515,
|
|
"step": 85
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2339.1215209960938,
|
|
"dapo/avg_reward_std": 0.24339192857344946,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.291666673289405,
|
|
"dapo/num_sampling_attempts": 4.5,
|
|
"dapo/sampling_efficiency": 35.3125,
|
|
"dapo/total_prompts_processed": 27.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.09828571428571428,
|
|
"grad_norm": 0.03125083073973656,
|
|
"kl": 1.6085803508758545e-05,
|
|
"learning_rate": 7.401782177833147e-07,
|
|
"loss": -0.0221,
|
|
"reward": 0.4631906310096383,
|
|
"reward_std": 0.9198382347822189,
|
|
"step": 86
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 1837.8993301391602,
|
|
"dapo/avg_reward_std": 0.22774873872598012,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3777777845660845,
|
|
"dapo/num_sampling_attempts": 3.75,
|
|
"dapo/sampling_efficiency": 46.87499999999999,
|
|
"dapo/total_prompts_processed": 22.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.09942857142857142,
|
|
"grad_norm": 0.04138842225074768,
|
|
"kl": 1.7467886209487915e-05,
|
|
"learning_rate": 7.330314893841101e-07,
|
|
"loss": 0.0024,
|
|
"reward": 0.7271542213857174,
|
|
"reward_std": 0.905590832233429,
|
|
"step": 87
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2786.0416564941406,
|
|
"dapo/avg_reward_std": 0.2095056755202157,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.2952381019081388,
|
|
"dapo/num_sampling_attempts": 4.375,
|
|
"dapo/sampling_efficiency": 35.65972222222222,
|
|
"dapo/total_prompts_processed": 26.25,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.10057142857142858,
|
|
"grad_norm": 0.025848887860774994,
|
|
"kl": 7.427297532558441e-06,
|
|
"learning_rate": 7.258290078201731e-07,
|
|
"loss": 0.002,
|
|
"reward": 0.43730420619249344,
|
|
"reward_std": 0.9195110127329826,
|
|
"step": 88
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2346.68754196167,
|
|
"dapo/avg_reward_std": 0.19395678072440914,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.2560975657003682,
|
|
"dapo/num_sampling_attempts": 5.125,
|
|
"dapo/sampling_efficiency": 35.01488095238095,
|
|
"dapo/total_prompts_processed": 30.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.10171428571428572,
|
|
"grad_norm": 0.040970027446746826,
|
|
"kl": 1.3796612620353699e-05,
|
|
"learning_rate": 7.185729670371604e-07,
|
|
"loss": 0.0476,
|
|
"reward": 0.6351554682478309,
|
|
"reward_std": 0.8568265736103058,
|
|
"step": 89
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2486.21875,
|
|
"dapo/avg_reward_std": 0.2474305311153675,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3735632284961898,
|
|
"dapo/num_sampling_attempts": 3.625,
|
|
"dapo/sampling_efficiency": 37.61904761904762,
|
|
"dapo/total_prompts_processed": 21.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.10285714285714286,
|
|
"grad_norm": 0.030587567016482353,
|
|
"kl": 1.4983117580413818e-05,
|
|
"learning_rate": 7.11265577295385e-07,
|
|
"loss": 0.0254,
|
|
"reward": 0.6515812119469047,
|
|
"reward_std": 0.9235646799206734,
|
|
"step": 90
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2515.017402648926,
|
|
"dapo/avg_reward_std": 0.25874078144197876,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3913043562484824,
|
|
"dapo/num_sampling_attempts": 2.875,
|
|
"dapo/sampling_efficiency": 51.56249999999999,
|
|
"dapo/total_prompts_processed": 17.25,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.104,
|
|
"grad_norm": 0.031289275735616684,
|
|
"kl": 6.1551108956336975e-06,
|
|
"learning_rate": 7.039090644965509e-07,
|
|
"loss": 0.0328,
|
|
"reward": 0.6403396036475897,
|
|
"reward_std": 0.9428967460989952,
|
|
"step": 91
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2979.027801513672,
|
|
"dapo/avg_reward_std": 0.2504267347486396,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.2543859713171658,
|
|
"dapo/num_sampling_attempts": 4.75,
|
|
"dapo/sampling_efficiency": 35.63041125541125,
|
|
"dapo/total_prompts_processed": 28.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.10514285714285715,
|
|
"grad_norm": 0.029049718752503395,
|
|
"kl": -1.2740492820739746e-06,
|
|
"learning_rate": 6.965056695057204e-07,
|
|
"loss": 0.0314,
|
|
"reward": 0.535519327968359,
|
|
"reward_std": 0.8926167041063309,
|
|
"step": 92
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2552.562515258789,
|
|
"dapo/avg_reward_std": 0.2413217886801689,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.33333334038334506,
|
|
"dapo/num_sampling_attempts": 3.875,
|
|
"dapo/sampling_efficiency": 34.791666666666664,
|
|
"dapo/total_prompts_processed": 23.25,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.10628571428571429,
|
|
"grad_norm": 0.03139115869998932,
|
|
"kl": 1.3202428817749023e-05,
|
|
"learning_rate": 6.890576474687263e-07,
|
|
"loss": 0.067,
|
|
"reward": 0.6561751328408718,
|
|
"reward_std": 0.9787176623940468,
|
|
"step": 93
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2403.184051513672,
|
|
"dapo/avg_reward_std": 0.29813223962600416,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.40384616129673445,
|
|
"dapo/num_sampling_attempts": 3.25,
|
|
"dapo/sampling_efficiency": 40.416666666666664,
|
|
"dapo/total_prompts_processed": 19.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.10742857142857143,
|
|
"grad_norm": 0.032709378749132156,
|
|
"kl": 2.093333750963211e-05,
|
|
"learning_rate": 6.815672671252315e-07,
|
|
"loss": 0.0328,
|
|
"reward": 0.556912356056273,
|
|
"reward_std": 0.9464646279811859,
|
|
"step": 94
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2963.795181274414,
|
|
"dapo/avg_reward_std": 0.2564438986472594,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.26068376577817476,
|
|
"dapo/num_sampling_attempts": 4.875,
|
|
"dapo/sampling_efficiency": 24.07738095238095,
|
|
"dapo/total_prompts_processed": 29.25,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.10857142857142857,
|
|
"grad_norm": 0.023549171164631844,
|
|
"kl": 9.554903954267502e-06,
|
|
"learning_rate": 6.740368101176495e-07,
|
|
"loss": 0.0142,
|
|
"reward": 0.3492610058747232,
|
|
"reward_std": 0.8781530037522316,
|
|
"step": 95
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2655.21875,
|
|
"dapo/avg_reward_std": 0.31138683449138294,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.46969697827642615,
|
|
"dapo/num_sampling_attempts": 2.75,
|
|
"dapo/sampling_efficiency": 43.74999999999999,
|
|
"dapo/total_prompts_processed": 16.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.10971428571428571,
|
|
"grad_norm": 0.03213554993271828,
|
|
"kl": 1.945020630955696e-05,
|
|
"learning_rate": 6.664685702961344e-07,
|
|
"loss": 0.0357,
|
|
"reward": 0.4872458651661873,
|
|
"reward_std": 0.9538498669862747,
|
|
"step": 96
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2325.888900756836,
|
|
"dapo/avg_reward_std": 0.18781672976911068,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.2968750069849193,
|
|
"dapo/num_sampling_attempts": 4.0,
|
|
"dapo/sampling_efficiency": 38.263888888888886,
|
|
"dapo/total_prompts_processed": 24.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.11085714285714286,
|
|
"grad_norm": 0.03308973088860512,
|
|
"kl": 1.2524658814072609e-05,
|
|
"learning_rate": 6.588648530198504e-07,
|
|
"loss": 0.0332,
|
|
"reward": 0.5582090672105551,
|
|
"reward_std": 0.9704806208610535,
|
|
"step": 97
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2980.78125,
|
|
"dapo/avg_reward_std": 0.22120360245830134,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.29824561900214147,
|
|
"dapo/num_sampling_attempts": 4.75,
|
|
"dapo/sampling_efficiency": 34.717261904761905,
|
|
"dapo/total_prompts_processed": 28.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.112,
|
|
"grad_norm": 0.02593560516834259,
|
|
"kl": 9.87970270216465e-06,
|
|
"learning_rate": 6.512279744547392e-07,
|
|
"loss": 0.0537,
|
|
"reward": 0.5110117536969483,
|
|
"reward_std": 0.9140844419598579,
|
|
"step": 98
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2679.701400756836,
|
|
"dapo/avg_reward_std": 0.22513854503631592,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.388888892200258,
|
|
"dapo/num_sampling_attempts": 3.375,
|
|
"dapo/sampling_efficiency": 40.104166666666664,
|
|
"dapo/total_prompts_processed": 20.25,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.11314285714285714,
|
|
"grad_norm": 0.028198201209306717,
|
|
"kl": -2.773245796561241e-06,
|
|
"learning_rate": 6.435602608679916e-07,
|
|
"loss": 0.0223,
|
|
"reward": 0.5703150723129511,
|
|
"reward_std": 0.9169064536690712,
|
|
"step": 99
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2113.7396087646484,
|
|
"dapo/avg_reward_std": 0.2158526074555185,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.2916666724615627,
|
|
"dapo/num_sampling_attempts": 4.5,
|
|
"dapo/sampling_efficiency": 30.823863636363633,
|
|
"dapo/total_prompts_processed": 27.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.11428571428571428,
|
|
"grad_norm": 0.032321903854608536,
|
|
"kl": 2.765655517578125e-05,
|
|
"learning_rate": 6.358640479194451e-07,
|
|
"loss": 0.037,
|
|
"reward": 0.552736995741725,
|
|
"reward_std": 0.929665133357048,
|
|
"step": 100
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2397.545135498047,
|
|
"dapo/avg_reward_std": 0.2640196681022644,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.41304348603538843,
|
|
"dapo/num_sampling_attempts": 2.875,
|
|
"dapo/sampling_efficiency": 43.75,
|
|
"dapo/total_prompts_processed": 17.25,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.11542857142857142,
|
|
"grad_norm": 0.030507881194353104,
|
|
"kl": 1.4653429388999939e-05,
|
|
"learning_rate": 6.281416799501187e-07,
|
|
"loss": 0.0216,
|
|
"reward": 0.7607237044721842,
|
|
"reward_std": 0.9413916915655136,
|
|
"step": 101
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2775.312515258789,
|
|
"dapo/avg_reward_std": 0.26319959415839267,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3910256509597485,
|
|
"dapo/num_sampling_attempts": 3.25,
|
|
"dapo/sampling_efficiency": 38.95833333333333,
|
|
"dapo/total_prompts_processed": 19.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.11657142857142858,
|
|
"grad_norm": 0.028825754299759865,
|
|
"kl": 1.7821788787841797e-05,
|
|
"learning_rate": 6.203955092681039e-07,
|
|
"loss": -0.0059,
|
|
"reward": 0.4367541056126356,
|
|
"reward_std": 0.9408165961503983,
|
|
"step": 102
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2606.3194580078125,
|
|
"dapo/avg_reward_std": 0.22601407093386497,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.295698931620967,
|
|
"dapo/num_sampling_attempts": 3.875,
|
|
"dapo/sampling_efficiency": 30.624999999999993,
|
|
"dapo/total_prompts_processed": 23.25,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.11771428571428572,
|
|
"grad_norm": 0.029979709535837173,
|
|
"kl": 2.3851171135902405e-06,
|
|
"learning_rate": 6.126278954320294e-07,
|
|
"loss": 0.0463,
|
|
"reward": 0.6886496935039759,
|
|
"reward_std": 0.9053627252578735,
|
|
"step": 103
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2084.829849243164,
|
|
"dapo/avg_reward_std": 0.22010741523794225,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.2702702747003452,
|
|
"dapo/num_sampling_attempts": 4.625,
|
|
"dapo/sampling_efficiency": 32.51488095238095,
|
|
"dapo/total_prompts_processed": 27.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.11885714285714286,
|
|
"grad_norm": 0.04769710823893547,
|
|
"kl": 2.0613893866539e-05,
|
|
"learning_rate": 6.048412045323164e-07,
|
|
"loss": 0.1162,
|
|
"reward": 0.684872523881495,
|
|
"reward_std": 0.9595381543040276,
|
|
"step": 104
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 1955.1354484558105,
|
|
"dapo/avg_reward_std": 0.2937169720729192,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.42361111504336196,
|
|
"dapo/num_sampling_attempts": 3.0,
|
|
"dapo/sampling_efficiency": 49.166666666666664,
|
|
"dapo/total_prompts_processed": 18.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.12,
|
|
"grad_norm": 0.04352044314146042,
|
|
"kl": 2.0936131477355957e-05,
|
|
"learning_rate": 5.97037808470444e-07,
|
|
"loss": -0.0017,
|
|
"reward": 0.6524754576385021,
|
|
"reward_std": 0.9669848829507828,
|
|
"step": 105
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2316.0486221313477,
|
|
"dapo/avg_reward_std": 0.2529407059773803,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3020833423361182,
|
|
"dapo/num_sampling_attempts": 4.0,
|
|
"dapo/sampling_efficiency": 30.729166666666664,
|
|
"dapo/total_prompts_processed": 24.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.12114285714285715,
|
|
"grad_norm": 0.03129468858242035,
|
|
"kl": 1.8656253814697266e-05,
|
|
"learning_rate": 5.892200842364462e-07,
|
|
"loss": -0.0284,
|
|
"reward": 0.6108895651996136,
|
|
"reward_std": 0.9319325312972069,
|
|
"step": 106
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2094.6909942626953,
|
|
"dapo/avg_reward_std": 0.2037892586655087,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.2629629688130485,
|
|
"dapo/num_sampling_attempts": 5.625,
|
|
"dapo/sampling_efficiency": 21.066919191919194,
|
|
"dapo/total_prompts_processed": 33.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.12228571428571429,
|
|
"grad_norm": 0.038948290050029755,
|
|
"kl": 2.824072726070881e-05,
|
|
"learning_rate": 5.813904131848564e-07,
|
|
"loss": 0.0748,
|
|
"reward": 0.48047966323792934,
|
|
"reward_std": 0.9251860752701759,
|
|
"step": 107
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2482.6146240234375,
|
|
"dapo/avg_reward_std": 0.19606016278266908,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.22592593100335862,
|
|
"dapo/num_sampling_attempts": 5.625,
|
|
"dapo/sampling_efficiency": 21.577380952380953,
|
|
"dapo/total_prompts_processed": 33.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.12342857142857143,
|
|
"grad_norm": 0.027610260993242264,
|
|
"kl": 1.3685785233974457e-05,
|
|
"learning_rate": 5.735511803093248e-07,
|
|
"loss": 0.0016,
|
|
"reward": 0.46788009256124496,
|
|
"reward_std": 0.9522990807890892,
|
|
"step": 108
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 3010.541717529297,
|
|
"dapo/avg_reward_std": 0.23601235449314117,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.38461538977347887,
|
|
"dapo/num_sampling_attempts": 3.25,
|
|
"dapo/sampling_efficiency": 61.5530303030303,
|
|
"dapo/total_prompts_processed": 19.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.12457142857142857,
|
|
"grad_norm": 0.031469572335481644,
|
|
"kl": 2.0675361156463623e-05,
|
|
"learning_rate": 5.657047735161255e-07,
|
|
"loss": 0.0491,
|
|
"reward": 0.6003496535122395,
|
|
"reward_std": 0.9582010880112648,
|
|
"step": 109
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2550.388931274414,
|
|
"dapo/avg_reward_std": 0.24275302588939668,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3222222273548444,
|
|
"dapo/num_sampling_attempts": 3.75,
|
|
"dapo/sampling_efficiency": 41.666666666666664,
|
|
"dapo/total_prompts_processed": 22.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.12571428571428572,
|
|
"grad_norm": 0.03043791465461254,
|
|
"kl": 1.619383692741394e-05,
|
|
"learning_rate": 5.578535828967777e-07,
|
|
"loss": 0.0395,
|
|
"reward": 0.6210233392193913,
|
|
"reward_std": 0.9545274153351784,
|
|
"step": 110
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2248.6771240234375,
|
|
"dapo/avg_reward_std": 0.2556017003953457,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.32291667349636555,
|
|
"dapo/num_sampling_attempts": 4.0,
|
|
"dapo/sampling_efficiency": 40.451388888888886,
|
|
"dapo/total_prompts_processed": 24.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.12685714285714286,
|
|
"grad_norm": 0.029558613896369934,
|
|
"kl": 1.7130747437477112e-05,
|
|
"learning_rate": 5.5e-07,
|
|
"loss": 0.0156,
|
|
"reward": 0.8898655958473682,
|
|
"reward_std": 0.8961458280682564,
|
|
"step": 111
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2790.4132537841797,
|
|
"dapo/avg_reward_std": 0.2798377914088113,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.35714286299688475,
|
|
"dapo/num_sampling_attempts": 3.5,
|
|
"dapo/sampling_efficiency": 32.291666666666664,
|
|
"dapo/total_prompts_processed": 21.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.128,
|
|
"grad_norm": 0.02665926143527031,
|
|
"kl": 2.7702553779818118e-05,
|
|
"learning_rate": 5.421464171032224e-07,
|
|
"loss": 0.0375,
|
|
"reward": 0.4765107296407223,
|
|
"reward_std": 0.9586756750941277,
|
|
"step": 112
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2058.163261413574,
|
|
"dapo/avg_reward_std": 0.21719616024117722,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.2850877270102501,
|
|
"dapo/num_sampling_attempts": 4.75,
|
|
"dapo/sampling_efficiency": 36.13636363636364,
|
|
"dapo/total_prompts_processed": 28.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.12914285714285714,
|
|
"grad_norm": 0.03724399581551552,
|
|
"kl": 9.129568934440613e-05,
|
|
"learning_rate": 5.342952264838747e-07,
|
|
"loss": 0.0308,
|
|
"reward": 0.5965504869818687,
|
|
"reward_std": 0.9517285376787186,
|
|
"step": 113
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 1804.7569427490234,
|
|
"dapo/avg_reward_std": 0.22654692203767837,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.30645161819073463,
|
|
"dapo/num_sampling_attempts": 3.875,
|
|
"dapo/sampling_efficiency": 46.800595238095234,
|
|
"dapo/total_prompts_processed": 23.25,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.13028571428571428,
|
|
"grad_norm": 0.0444670133292675,
|
|
"kl": 3.589317202568054e-05,
|
|
"learning_rate": 5.264488196906752e-07,
|
|
"loss": 0.0217,
|
|
"reward": 0.4887783471494913,
|
|
"reward_std": 0.9572358801960945,
|
|
"step": 114
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2705.472236633301,
|
|
"dapo/avg_reward_std": 0.24942583271435328,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.4285714335384823,
|
|
"dapo/num_sampling_attempts": 2.625,
|
|
"dapo/sampling_efficiency": 51.45833333333333,
|
|
"dapo/total_prompts_processed": 15.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.13142857142857142,
|
|
"grad_norm": 0.027661452069878578,
|
|
"kl": 1.307763159275055e-05,
|
|
"learning_rate": 5.186095868151436e-07,
|
|
"loss": -0.022,
|
|
"reward": 0.5754544343799353,
|
|
"reward_std": 0.9811793565750122,
|
|
"step": 115
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 1660.2222213745117,
|
|
"dapo/avg_reward_std": 0.20845345951415398,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.30630631100487066,
|
|
"dapo/num_sampling_attempts": 4.625,
|
|
"dapo/sampling_efficiency": 32.013888888888886,
|
|
"dapo/total_prompts_processed": 27.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.13257142857142856,
|
|
"grad_norm": 0.03922427445650101,
|
|
"kl": 7.28946179151535e-06,
|
|
"learning_rate": 5.107799157635538e-07,
|
|
"loss": 0.0279,
|
|
"reward": 0.8034113459289074,
|
|
"reward_std": 0.9163173362612724,
|
|
"step": 116
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2143.3368377685547,
|
|
"dapo/avg_reward_std": 0.25861393963849105,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3456790193363472,
|
|
"dapo/num_sampling_attempts": 3.375,
|
|
"dapo/sampling_efficiency": 38.95833333333333,
|
|
"dapo/total_prompts_processed": 20.25,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.1337142857142857,
|
|
"grad_norm": 0.0386907123029232,
|
|
"kl": 2.8124195523560047e-05,
|
|
"learning_rate": 5.02962191529556e-07,
|
|
"loss": 0.0157,
|
|
"reward": 0.5698221866041422,
|
|
"reward_std": 0.9738077968358994,
|
|
"step": 117
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2709.371551513672,
|
|
"dapo/avg_reward_std": 0.17381487890731456,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.26356589343658715,
|
|
"dapo/num_sampling_attempts": 5.375,
|
|
"dapo/sampling_efficiency": 31.522817460317455,
|
|
"dapo/total_prompts_processed": 32.25,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.13485714285714287,
|
|
"grad_norm": 0.03524978086352348,
|
|
"kl": 2.0368024706840515e-05,
|
|
"learning_rate": 4.951587954676837e-07,
|
|
"loss": 0.073,
|
|
"reward": 0.5433152373880148,
|
|
"reward_std": 0.9576972275972366,
|
|
"step": 118
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2729.6458129882812,
|
|
"dapo/avg_reward_std": 0.2853468172252178,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.31770834140479565,
|
|
"dapo/num_sampling_attempts": 4.0,
|
|
"dapo/sampling_efficiency": 38.13988095238095,
|
|
"dapo/total_prompts_processed": 24.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.136,
|
|
"grad_norm": 0.035877469927072525,
|
|
"kl": 9.79006290435791e-06,
|
|
"learning_rate": 4.873721045679706e-07,
|
|
"loss": 0.0223,
|
|
"reward": 0.4996686838567257,
|
|
"reward_std": 0.9503490626811981,
|
|
"step": 119
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2456.458351135254,
|
|
"dapo/avg_reward_std": 0.3290893492244539,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.5000000127724239,
|
|
"dapo/num_sampling_attempts": 2.625,
|
|
"dapo/sampling_efficiency": 40.62499999999999,
|
|
"dapo/total_prompts_processed": 15.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.13714285714285715,
|
|
"grad_norm": 0.03583266958594322,
|
|
"kl": 9.331852197647095e-06,
|
|
"learning_rate": 4.79604490731896e-07,
|
|
"loss": 0.0363,
|
|
"reward": 0.8003920987248421,
|
|
"reward_std": 0.955727644264698,
|
|
"step": 120
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2489.1875,
|
|
"dapo/avg_reward_std": 0.1615937834694272,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.22222222600664412,
|
|
"dapo/num_sampling_attempts": 5.25,
|
|
"dapo/sampling_efficiency": 37.41987179487179,
|
|
"dapo/total_prompts_processed": 31.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.1382857142857143,
|
|
"grad_norm": 0.027044769376516342,
|
|
"kl": 2.0619481801986694e-05,
|
|
"learning_rate": 4.7185832004988133e-07,
|
|
"loss": 0.0123,
|
|
"reward": 0.5692465994507074,
|
|
"reward_std": 0.9356264397501945,
|
|
"step": 121
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2946.687530517578,
|
|
"dapo/avg_reward_std": 0.26767816713878084,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3452381023338863,
|
|
"dapo/num_sampling_attempts": 3.5,
|
|
"dapo/sampling_efficiency": 33.75,
|
|
"dapo/total_prompts_processed": 21.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.13942857142857143,
|
|
"grad_norm": 0.03187067061662674,
|
|
"kl": 2.1383166313171387e-05,
|
|
"learning_rate": 4.641359520805548e-07,
|
|
"loss": 0.0722,
|
|
"reward": 0.42231168132275343,
|
|
"reward_std": 0.9001481607556343,
|
|
"step": 122
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 1841.1458206176758,
|
|
"dapo/avg_reward_std": 0.32384763956069945,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.4000000065565109,
|
|
"dapo/num_sampling_attempts": 3.125,
|
|
"dapo/sampling_efficiency": 41.041666666666664,
|
|
"dapo/total_prompts_processed": 18.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.14057142857142857,
|
|
"grad_norm": 0.03784916177392006,
|
|
"kl": 4.2632222175598145e-05,
|
|
"learning_rate": 4.5643973913200837e-07,
|
|
"loss": 0.0367,
|
|
"reward": 0.6476083844900131,
|
|
"reward_std": 0.908843033015728,
|
|
"step": 123
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2392.166702270508,
|
|
"dapo/avg_reward_std": 0.26674444922085466,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3218390869683233,
|
|
"dapo/num_sampling_attempts": 3.625,
|
|
"dapo/sampling_efficiency": 31.666666666666664,
|
|
"dapo/total_prompts_processed": 21.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.1417142857142857,
|
|
"grad_norm": 0.02941369265317917,
|
|
"kl": 2.299714833498001e-05,
|
|
"learning_rate": 4.4877202554526084e-07,
|
|
"loss": 0.0152,
|
|
"reward": 0.5824479665607214,
|
|
"reward_std": 0.9478363320231438,
|
|
"step": 124
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 3125.159713745117,
|
|
"dapo/avg_reward_std": 0.29309388995170593,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.5000000049670538,
|
|
"dapo/num_sampling_attempts": 2.625,
|
|
"dapo/sampling_efficiency": 51.45833333333333,
|
|
"dapo/total_prompts_processed": 15.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.14285714285714285,
|
|
"grad_norm": 0.030095171183347702,
|
|
"kl": 3.2413750886917114e-05,
|
|
"learning_rate": 4.4113514698014953e-07,
|
|
"loss": 0.0534,
|
|
"reward": 0.5003506469074637,
|
|
"reward_std": 0.8919698372483253,
|
|
"step": 125
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2462.8368377685547,
|
|
"dapo/avg_reward_std": 0.2680182981491089,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3466666728258133,
|
|
"dapo/num_sampling_attempts": 3.125,
|
|
"dapo/sampling_efficiency": 46.87499999999999,
|
|
"dapo/total_prompts_processed": 18.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.144,
|
|
"grad_norm": 0.04286734014749527,
|
|
"kl": 5.683675408363342e-05,
|
|
"learning_rate": 4.3353142970386557e-07,
|
|
"loss": 0.0028,
|
|
"reward": 0.5951744802296162,
|
|
"reward_std": 0.9584252312779427,
|
|
"step": 126
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2443.4618225097656,
|
|
"dapo/avg_reward_std": 0.19895405417833573,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.2820512862541737,
|
|
"dapo/num_sampling_attempts": 4.875,
|
|
"dapo/sampling_efficiency": 33.90376984126984,
|
|
"dapo/total_prompts_processed": 29.25,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.14514285714285713,
|
|
"grad_norm": 0.03486345708370209,
|
|
"kl": 2.958625555038452e-05,
|
|
"learning_rate": 4.2596318988235037e-07,
|
|
"loss": -0.0055,
|
|
"reward": 0.7111770529299974,
|
|
"reward_std": 0.9570346251130104,
|
|
"step": 127
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2227.385452270508,
|
|
"dapo/avg_reward_std": 0.22934340153421676,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.33333333688122885,
|
|
"dapo/num_sampling_attempts": 3.5,
|
|
"dapo/sampling_efficiency": 52.291666666666664,
|
|
"dapo/total_prompts_processed": 21.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.1462857142857143,
|
|
"grad_norm": 0.04721139743924141,
|
|
"kl": 3.547314554452896e-05,
|
|
"learning_rate": 4.1843273287476854e-07,
|
|
"loss": 0.1085,
|
|
"reward": 0.4447980001568794,
|
|
"reward_std": 0.951726958155632,
|
|
"step": 128
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2883.357681274414,
|
|
"dapo/avg_reward_std": 0.4109063148498535,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.6777777880430221,
|
|
"dapo/num_sampling_attempts": 1.875,
|
|
"dapo/sampling_efficiency": 65.625,
|
|
"dapo/total_prompts_processed": 11.25,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.14742857142857144,
|
|
"grad_norm": 0.02544778771698475,
|
|
"kl": 9.082257747650146e-06,
|
|
"learning_rate": 4.1094235253127374e-07,
|
|
"loss": 0.046,
|
|
"reward": 0.6885830331593752,
|
|
"reward_std": 0.9739237055182457,
|
|
"step": 129
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2122.795181274414,
|
|
"dapo/avg_reward_std": 0.2591241377371329,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3641975356472863,
|
|
"dapo/num_sampling_attempts": 3.375,
|
|
"dapo/sampling_efficiency": 39.70238095238095,
|
|
"dapo/total_prompts_processed": 20.25,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.14857142857142858,
|
|
"grad_norm": 0.03150525689125061,
|
|
"kl": 3.223586827516556e-05,
|
|
"learning_rate": 4.034943304942796e-07,
|
|
"loss": 0.0306,
|
|
"reward": 0.5525269485078752,
|
|
"reward_std": 0.9417792037129402,
|
|
"step": 130
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2306.8611450195312,
|
|
"dapo/avg_reward_std": 0.3414611066209859,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3908046078065346,
|
|
"dapo/num_sampling_attempts": 3.625,
|
|
"dapo/sampling_efficiency": 32.410714285714285,
|
|
"dapo/total_prompts_processed": 21.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.14971428571428572,
|
|
"grad_norm": 0.036385975778102875,
|
|
"kl": 4.038959741592407e-05,
|
|
"learning_rate": 3.9609093550344907e-07,
|
|
"loss": 0.0679,
|
|
"reward": 0.5595943983644247,
|
|
"reward_std": 0.9294908344745636,
|
|
"step": 131
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2100.4444694519043,
|
|
"dapo/avg_reward_std": 0.22894747753938038,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.34444445222616193,
|
|
"dapo/num_sampling_attempts": 3.75,
|
|
"dapo/sampling_efficiency": 38.541666666666664,
|
|
"dapo/total_prompts_processed": 22.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.15085714285714286,
|
|
"grad_norm": 0.05820675194263458,
|
|
"kl": 7.29486346244812e-05,
|
|
"learning_rate": 3.8873442270461485e-07,
|
|
"loss": 0.0548,
|
|
"reward": 0.5259249797090888,
|
|
"reward_std": 0.9095494002103806,
|
|
"step": 132
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2399.0555725097656,
|
|
"dapo/avg_reward_std": 0.2968884447346563,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.4057971057684525,
|
|
"dapo/num_sampling_attempts": 2.875,
|
|
"dapo/sampling_efficiency": 48.33333333333333,
|
|
"dapo/total_prompts_processed": 17.25,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.152,
|
|
"grad_norm": 0.03143748641014099,
|
|
"kl": 1.6003847122192383e-05,
|
|
"learning_rate": 3.8142703296283953e-07,
|
|
"loss": 0.0154,
|
|
"reward": 0.6293735019862652,
|
|
"reward_std": 0.9267243668437004,
|
|
"step": 133
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2028.9653091430664,
|
|
"dapo/avg_reward_std": 0.24916886538267136,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.4097222276031971,
|
|
"dapo/num_sampling_attempts": 3.0,
|
|
"dapo/sampling_efficiency": 45.83333333333333,
|
|
"dapo/total_prompts_processed": 18.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.15314285714285714,
|
|
"grad_norm": 0.03667714074254036,
|
|
"kl": 2.6845373213291168e-05,
|
|
"learning_rate": 3.7417099217982686e-07,
|
|
"loss": 0.0108,
|
|
"reward": 0.6901863785460591,
|
|
"reward_std": 0.9471788480877876,
|
|
"step": 134
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2116.6493225097656,
|
|
"dapo/avg_reward_std": 0.3074521411742483,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.37500000638621195,
|
|
"dapo/num_sampling_attempts": 3.5,
|
|
"dapo/sampling_efficiency": 33.035714285714285,
|
|
"dapo/total_prompts_processed": 21.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.15428571428571428,
|
|
"grad_norm": 0.04016295075416565,
|
|
"kl": 4.020519554615021e-05,
|
|
"learning_rate": 3.6696851061588994e-07,
|
|
"loss": 0.081,
|
|
"reward": 0.6064621905097738,
|
|
"reward_std": 0.9165264815092087,
|
|
"step": 135
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2051.2812728881836,
|
|
"dapo/avg_reward_std": 0.20643932349754102,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.2979798059571873,
|
|
"dapo/num_sampling_attempts": 4.125,
|
|
"dapo/sampling_efficiency": 49.26136363636363,
|
|
"dapo/total_prompts_processed": 24.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.15542857142857142,
|
|
"grad_norm": 0.03907117620110512,
|
|
"kl": 4.081428050994873e-05,
|
|
"learning_rate": 3.5982178221668533e-07,
|
|
"loss": 0.0631,
|
|
"reward": 0.6007686145603657,
|
|
"reward_std": 0.946811780333519,
|
|
"step": 136
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2981.6145935058594,
|
|
"dapo/avg_reward_std": 0.17673770231860025,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.26190476829097387,
|
|
"dapo/num_sampling_attempts": 5.25,
|
|
"dapo/sampling_efficiency": 33.19444444444444,
|
|
"dapo/total_prompts_processed": 31.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.15657142857142858,
|
|
"grad_norm": 0.026764124631881714,
|
|
"kl": 2.1813437342643738e-05,
|
|
"learning_rate": 3.5273298394491515e-07,
|
|
"loss": 0.0296,
|
|
"reward": 0.5422612819820642,
|
|
"reward_std": 0.9660339280962944,
|
|
"step": 137
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 1996.4930725097656,
|
|
"dapo/avg_reward_std": 0.2211539367834727,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.35000000447034835,
|
|
"dapo/num_sampling_attempts": 3.75,
|
|
"dapo/sampling_efficiency": 41.666666666666664,
|
|
"dapo/total_prompts_processed": 22.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.15771428571428572,
|
|
"grad_norm": 0.036459192633628845,
|
|
"kl": 6.0535967350006104e-05,
|
|
"learning_rate": 3.45704275117204e-07,
|
|
"loss": 0.0473,
|
|
"reward": 0.6352426074445248,
|
|
"reward_std": 1.0075769945979118,
|
|
"step": 138
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2673.013931274414,
|
|
"dapo/avg_reward_std": 0.21187836019431844,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.28431372738936367,
|
|
"dapo/num_sampling_attempts": 4.25,
|
|
"dapo/sampling_efficiency": 40.347222222222214,
|
|
"dapo/total_prompts_processed": 25.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.15885714285714286,
|
|
"grad_norm": 0.027443382889032364,
|
|
"kl": 4.770606756210327e-05,
|
|
"learning_rate": 3.387377967463493e-07,
|
|
"loss": 0.0398,
|
|
"reward": 0.53852697648108,
|
|
"reward_std": 0.9717471078038216,
|
|
"step": 139
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2352.944465637207,
|
|
"dapo/avg_reward_std": 0.28073156496574136,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.33908046936166697,
|
|
"dapo/num_sampling_attempts": 3.625,
|
|
"dapo/sampling_efficiency": 31.666666666666664,
|
|
"dapo/total_prompts_processed": 21.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.16,
|
|
"grad_norm": 0.03219648823142052,
|
|
"kl": 1.9827857613563538e-05,
|
|
"learning_rate": 3.3183567088914833e-07,
|
|
"loss": 0.0502,
|
|
"reward": 0.5767329391092062,
|
|
"reward_std": 0.920682892203331,
|
|
"step": 140
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2714.9097595214844,
|
|
"dapo/avg_reward_std": 0.17997434735298157,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.26495726979695833,
|
|
"dapo/num_sampling_attempts": 4.875,
|
|
"dapo/sampling_efficiency": 24.82142857142857,
|
|
"dapo/total_prompts_processed": 29.25,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.16114285714285714,
|
|
"grad_norm": 0.03654953092336655,
|
|
"kl": 2.0893290638923645e-05,
|
|
"learning_rate": 3.250000000000001e-07,
|
|
"loss": 0.0808,
|
|
"reward": 0.7222395315766335,
|
|
"reward_std": 0.9689760208129883,
|
|
"step": 141
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 1895.9965209960938,
|
|
"dapo/avg_reward_std": 0.24079040033476692,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.30476190788405283,
|
|
"dapo/num_sampling_attempts": 4.375,
|
|
"dapo/sampling_efficiency": 36.67207792207792,
|
|
"dapo/total_prompts_processed": 26.25,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.16228571428571428,
|
|
"grad_norm": 0.05263448879122734,
|
|
"kl": 8.018314838409424e-05,
|
|
"learning_rate": 3.182328662904756e-07,
|
|
"loss": 0.0952,
|
|
"reward": 0.5266689900308847,
|
|
"reward_std": 0.9142153859138489,
|
|
"step": 142
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2619.2291717529297,
|
|
"dapo/avg_reward_std": 0.2643248688790106,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.34408602887584316,
|
|
"dapo/num_sampling_attempts": 3.875,
|
|
"dapo/sampling_efficiency": 32.410714285714285,
|
|
"dapo/total_prompts_processed": 23.25,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.16342857142857142,
|
|
"grad_norm": 0.029158689081668854,
|
|
"kl": 3.154575824737549e-05,
|
|
"learning_rate": 3.115363310950578e-07,
|
|
"loss": 0.0032,
|
|
"reward": 0.5475870370864868,
|
|
"reward_std": 0.8940814658999443,
|
|
"step": 143
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2439.340316772461,
|
|
"dapo/avg_reward_std": 0.25194550690979794,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.33908046576483497,
|
|
"dapo/num_sampling_attempts": 3.625,
|
|
"dapo/sampling_efficiency": 48.86904761904761,
|
|
"dapo/total_prompts_processed": 21.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.16457142857142856,
|
|
"grad_norm": 0.027842765673995018,
|
|
"kl": 4.0609389543533325e-05,
|
|
"learning_rate": 3.0491243424323783e-07,
|
|
"loss": 0.0,
|
|
"reward": 0.6661859937012196,
|
|
"reward_std": 0.9778606072068214,
|
|
"step": 144
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2299.4166870117188,
|
|
"dapo/avg_reward_std": 0.19899881369358785,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.2567567603813635,
|
|
"dapo/num_sampling_attempts": 4.625,
|
|
"dapo/sampling_efficiency": 27.96626984126984,
|
|
"dapo/total_prompts_processed": 27.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.1657142857142857,
|
|
"grad_norm": 0.041895266622304916,
|
|
"kl": 6.861239671707153e-05,
|
|
"learning_rate": 2.9836319343816397e-07,
|
|
"loss": 0.1109,
|
|
"reward": 0.6072739865630865,
|
|
"reward_std": 0.9706787243485451,
|
|
"step": 145
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2448.3993225097656,
|
|
"dapo/avg_reward_std": 0.26682727987116034,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.4015151573853059,
|
|
"dapo/num_sampling_attempts": 2.75,
|
|
"dapo/sampling_efficiency": 61.25,
|
|
"dapo/total_prompts_processed": 16.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.16685714285714287,
|
|
"grad_norm": 0.033113960176706314,
|
|
"kl": 6.478279829025269e-05,
|
|
"learning_rate": 2.918906036420294e-07,
|
|
"loss": -0.0725,
|
|
"reward": 0.7111451979726553,
|
|
"reward_std": 0.9747665524482727,
|
|
"step": 146
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2499.4132080078125,
|
|
"dapo/avg_reward_std": 0.23725970940930502,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.36904762791735785,
|
|
"dapo/num_sampling_attempts": 3.5,
|
|
"dapo/sampling_efficiency": 40.972222222222214,
|
|
"dapo/total_prompts_processed": 21.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.168,
|
|
"grad_norm": 0.03699960932135582,
|
|
"kl": 5.050189793109894e-05,
|
|
"learning_rate": 2.854966364683872e-07,
|
|
"loss": 0.0512,
|
|
"reward": 0.5902281412854791,
|
|
"reward_std": 0.9745439067482948,
|
|
"step": 147
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2606.902816772461,
|
|
"dapo/avg_reward_std": 0.3174622275612571,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.46212122250686993,
|
|
"dapo/num_sampling_attempts": 2.75,
|
|
"dapo/sampling_efficiency": 50.416666666666664,
|
|
"dapo/total_prompts_processed": 16.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.16914285714285715,
|
|
"grad_norm": 0.032203614711761475,
|
|
"kl": 3.288034349679947e-05,
|
|
"learning_rate": 2.791832395815782e-07,
|
|
"loss": 0.0183,
|
|
"reward": 0.4769565463066101,
|
|
"reward_std": 0.9322275221347809,
|
|
"step": 148
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2815.8160247802734,
|
|
"dapo/avg_reward_std": 0.2469456638350631,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.2979798046025363,
|
|
"dapo/num_sampling_attempts": 4.125,
|
|
"dapo/sampling_efficiency": 35.11904761904762,
|
|
"dapo/total_prompts_processed": 24.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.1702857142857143,
|
|
"grad_norm": 0.030444171279668808,
|
|
"kl": 3.5978853702545166e-05,
|
|
"learning_rate": 2.729523361034538e-07,
|
|
"loss": 0.056,
|
|
"reward": 0.6807443965226412,
|
|
"reward_std": 0.9815046414732933,
|
|
"step": 149
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2225.520866394043,
|
|
"dapo/avg_reward_std": 0.19231303450134066,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.2777777844005161,
|
|
"dapo/num_sampling_attempts": 4.5,
|
|
"dapo/sampling_efficiency": 32.18749999999999,
|
|
"dapo/total_prompts_processed": 27.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.17142857142857143,
|
|
"grad_norm": 0.03868250176310539,
|
|
"kl": 4.6514905989170074e-05,
|
|
"learning_rate": 2.6680582402757324e-07,
|
|
"loss": -0.037,
|
|
"reward": 0.6887061549350619,
|
|
"reward_std": 0.9610730484127998,
|
|
"step": 150
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 3103.3784790039062,
|
|
"dapo/avg_reward_std": 0.20304633464132035,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.31547619295971735,
|
|
"dapo/num_sampling_attempts": 3.5,
|
|
"dapo/sampling_efficiency": 40.32738095238095,
|
|
"dapo/total_prompts_processed": 21.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.17257142857142857,
|
|
"grad_norm": 0.03259337320923805,
|
|
"kl": 7.005780935287476e-05,
|
|
"learning_rate": 2.6074557564105724e-07,
|
|
"loss": 0.0659,
|
|
"reward": 0.5518668536096811,
|
|
"reward_std": 0.9462934136390686,
|
|
"step": 151
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2488.499984741211,
|
|
"dapo/avg_reward_std": 0.20882706064730883,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3177083367481828,
|
|
"dapo/num_sampling_attempts": 4.0,
|
|
"dapo/sampling_efficiency": 39.409722222222214,
|
|
"dapo/total_prompts_processed": 24.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.1737142857142857,
|
|
"grad_norm": 0.030666321516036987,
|
|
"kl": 3.533810377120972e-05,
|
|
"learning_rate": 2.547734369542718e-07,
|
|
"loss": 0.0437,
|
|
"reward": 0.5291262120008469,
|
|
"reward_std": 0.981982946395874,
|
|
"step": 152
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2514.8507080078125,
|
|
"dapo/avg_reward_std": 0.20546393813910308,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3209876600239012,
|
|
"dapo/num_sampling_attempts": 3.375,
|
|
"dapo/sampling_efficiency": 35.93749999999999,
|
|
"dapo/total_prompts_processed": 20.25,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.17485714285714285,
|
|
"grad_norm": 0.028674930334091187,
|
|
"kl": 7.952749729156494e-05,
|
|
"learning_rate": 2.488912271385139e-07,
|
|
"loss": -0.0145,
|
|
"reward": 0.5828098729252815,
|
|
"reward_std": 0.9706256464123726,
|
|
"step": 153
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2717.2847290039062,
|
|
"dapo/avg_reward_std": 0.25499844749768574,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.36666667511065804,
|
|
"dapo/num_sampling_attempts": 3.75,
|
|
"dapo/sampling_efficiency": 34.791666666666664,
|
|
"dapo/total_prompts_processed": 22.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.176,
|
|
"grad_norm": 0.030772393569350243,
|
|
"kl": 4.854763392359018e-05,
|
|
"learning_rate": 2.4310073797187573e-07,
|
|
"loss": 0.0426,
|
|
"reward": 0.45278373593464494,
|
|
"reward_std": 0.9311749711632729,
|
|
"step": 154
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2762.3055725097656,
|
|
"dapo/avg_reward_std": 0.29779375117758045,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3985507280930229,
|
|
"dapo/num_sampling_attempts": 2.875,
|
|
"dapo/sampling_efficiency": 46.25,
|
|
"dapo/total_prompts_processed": 17.25,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.17714285714285713,
|
|
"grad_norm": 0.02795676700770855,
|
|
"kl": 6.116554141044617e-05,
|
|
"learning_rate": 2.374037332934512e-07,
|
|
"loss": -0.017,
|
|
"reward": 0.5571175646036863,
|
|
"reward_std": 0.951450802385807,
|
|
"step": 155
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2260.506950378418,
|
|
"dapo/avg_reward_std": 0.19260793987740862,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.20921986375717408,
|
|
"dapo/num_sampling_attempts": 5.875,
|
|
"dapo/sampling_efficiency": 20.416666666666664,
|
|
"dapo/total_prompts_processed": 35.25,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.1782857142857143,
|
|
"grad_norm": 0.03577401861548424,
|
|
"kl": 4.409998655319214e-05,
|
|
"learning_rate": 2.3180194846605364e-07,
|
|
"loss": 0.0769,
|
|
"reward": 0.6440617088228464,
|
|
"reward_std": 0.9337564334273338,
|
|
"step": 156
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2340.84725189209,
|
|
"dapo/avg_reward_std": 0.27447891732056934,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.40972222946584225,
|
|
"dapo/num_sampling_attempts": 3.0,
|
|
"dapo/sampling_efficiency": 46.87499999999999,
|
|
"dapo/total_prompts_processed": 18.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.17942857142857144,
|
|
"grad_norm": 0.045233093202114105,
|
|
"kl": 6.485730409622192e-05,
|
|
"learning_rate": 2.2629708984760706e-07,
|
|
"loss": 0.0363,
|
|
"reward": 0.7273098900914192,
|
|
"reward_std": 0.9823846518993378,
|
|
"step": 157
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2282.3819580078125,
|
|
"dapo/avg_reward_std": 0.20623917956101268,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.31140351334684774,
|
|
"dapo/num_sampling_attempts": 4.75,
|
|
"dapo/sampling_efficiency": 23.680555555555557,
|
|
"dapo/total_prompts_processed": 28.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.18057142857142858,
|
|
"grad_norm": 0.02890234813094139,
|
|
"kl": 5.996227264404297e-05,
|
|
"learning_rate": 2.2089083427137329e-07,
|
|
"loss": 0.0031,
|
|
"reward": 0.6950137317180634,
|
|
"reward_std": 0.9464666321873665,
|
|
"step": 158
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2021.6284866333008,
|
|
"dapo/avg_reward_std": 0.23576846316054062,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3198198257265864,
|
|
"dapo/num_sampling_attempts": 4.625,
|
|
"dapo/sampling_efficiency": 26.96969696969697,
|
|
"dapo/total_prompts_processed": 27.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.18171428571428572,
|
|
"grad_norm": 0.03477742150425911,
|
|
"kl": 6.712228059768677e-05,
|
|
"learning_rate": 2.1558482853517253e-07,
|
|
"loss": 0.0402,
|
|
"reward": 0.5178025495260954,
|
|
"reward_std": 0.9177478551864624,
|
|
"step": 159
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2372.9931030273438,
|
|
"dapo/avg_reward_std": 0.1955654670794805,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.24206349643922986,
|
|
"dapo/num_sampling_attempts": 5.25,
|
|
"dapo/sampling_efficiency": 22.916666666666664,
|
|
"dapo/total_prompts_processed": 31.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.18285714285714286,
|
|
"grad_norm": 0.03023899346590042,
|
|
"kl": 0.00011706352233886719,
|
|
"learning_rate": 2.1038068889975259e-07,
|
|
"loss": -0.023,
|
|
"reward": 0.5155377965420485,
|
|
"reward_std": 0.9538168758153915,
|
|
"step": 160
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2786.184097290039,
|
|
"dapo/avg_reward_std": 0.22358988050152273,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3039215772467501,
|
|
"dapo/num_sampling_attempts": 4.25,
|
|
"dapo/sampling_efficiency": 36.354166666666664,
|
|
"dapo/total_prompts_processed": 25.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.184,
|
|
"grad_norm": 0.029065359383821487,
|
|
"kl": 7.36340880393982e-05,
|
|
"learning_rate": 2.0528000059645995e-07,
|
|
"loss": 0.0183,
|
|
"reward": 0.5675038225017488,
|
|
"reward_std": 0.9294460043311119,
|
|
"step": 161
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2661.7986183166504,
|
|
"dapo/avg_reward_std": 0.23443660909129727,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3494623740834574,
|
|
"dapo/num_sampling_attempts": 3.875,
|
|
"dapo/sampling_efficiency": 39.166666666666664,
|
|
"dapo/total_prompts_processed": 23.25,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.18514285714285714,
|
|
"grad_norm": 0.03428042680025101,
|
|
"kl": 7.835030555725098e-05,
|
|
"learning_rate": 2.0028431734436308e-07,
|
|
"loss": 0.0077,
|
|
"reward": 0.6459280159324408,
|
|
"reward_std": 0.961892195045948,
|
|
"step": 162
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2645.9305725097656,
|
|
"dapo/avg_reward_std": 0.2903378981611003,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.39855073133240576,
|
|
"dapo/num_sampling_attempts": 2.875,
|
|
"dapo/sampling_efficiency": 54.166666666666664,
|
|
"dapo/total_prompts_processed": 17.25,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.18628571428571428,
|
|
"grad_norm": 0.026776015758514404,
|
|
"kl": 6.175786256790161e-05,
|
|
"learning_rate": 1.9539516087697517e-07,
|
|
"loss": 0.0499,
|
|
"reward": 0.834372952580452,
|
|
"reward_std": 0.9364972710609436,
|
|
"step": 163
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2940.7604370117188,
|
|
"dapo/avg_reward_std": 0.28692422310511273,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.35555556217829387,
|
|
"dapo/num_sampling_attempts": 3.75,
|
|
"dapo/sampling_efficiency": 32.708333333333336,
|
|
"dapo/total_prompts_processed": 22.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.18742857142857142,
|
|
"grad_norm": 0.03140675649046898,
|
|
"kl": 6.527453660964966e-05,
|
|
"learning_rate": 1.9061402047871833e-07,
|
|
"loss": 0.074,
|
|
"reward": 0.41690353071317077,
|
|
"reward_std": 0.9491114094853401,
|
|
"step": 164
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2281.6284675598145,
|
|
"dapo/avg_reward_std": 0.19226541501634262,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.28921569007284503,
|
|
"dapo/num_sampling_attempts": 4.25,
|
|
"dapo/sampling_efficiency": 34.285714285714285,
|
|
"dapo/total_prompts_processed": 25.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.18857142857142858,
|
|
"grad_norm": 0.044475626200437546,
|
|
"kl": 6.622821092605591e-05,
|
|
"learning_rate": 1.8594235253127372e-07,
|
|
"loss": 0.0216,
|
|
"reward": 0.5352295860648155,
|
|
"reward_std": 0.9716188460588455,
|
|
"step": 165
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2246.774314880371,
|
|
"dapo/avg_reward_std": 0.21395914729048565,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.2642276446993758,
|
|
"dapo/num_sampling_attempts": 5.125,
|
|
"dapo/sampling_efficiency": 32.51488095238095,
|
|
"dapo/total_prompts_processed": 30.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.18971428571428572,
|
|
"grad_norm": 0.03659826144576073,
|
|
"kl": 7.368624210357666e-05,
|
|
"learning_rate": 1.8138158006995363e-07,
|
|
"loss": 0.0485,
|
|
"reward": 0.5606641564518213,
|
|
"reward_std": 0.9496459811925888,
|
|
"step": 166
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2340.156265258789,
|
|
"dapo/avg_reward_std": 0.2663822333017985,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3888888974984487,
|
|
"dapo/num_sampling_attempts": 3.75,
|
|
"dapo/sampling_efficiency": 30.32738095238095,
|
|
"dapo/total_prompts_processed": 22.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.19085714285714286,
|
|
"grad_norm": 0.03370486944913864,
|
|
"kl": 0.00011890754103660583,
|
|
"learning_rate": 1.7693309235023127e-07,
|
|
"loss": 0.0107,
|
|
"reward": 0.615155003964901,
|
|
"reward_std": 0.981718622148037,
|
|
"step": 167
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 1600.381950378418,
|
|
"dapo/avg_reward_std": 0.2149174999859598,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.31481481964389485,
|
|
"dapo/num_sampling_attempts": 4.5,
|
|
"dapo/sampling_efficiency": 39.30555555555556,
|
|
"dapo/total_prompts_processed": 27.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.192,
|
|
"grad_norm": 0.040477264672517776,
|
|
"kl": 4.4405460357666016e-05,
|
|
"learning_rate": 1.7259824442455923e-07,
|
|
"loss": 0.0183,
|
|
"reward": 0.7775004804134369,
|
|
"reward_std": 0.9218784719705582,
|
|
"step": 168
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2663.3229370117188,
|
|
"dapo/avg_reward_std": 0.29243687472560187,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.4242424314672297,
|
|
"dapo/num_sampling_attempts": 2.75,
|
|
"dapo/sampling_efficiency": 45.20833333333333,
|
|
"dapo/total_prompts_processed": 16.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.19314285714285714,
|
|
"grad_norm": 0.033447615802288055,
|
|
"kl": 6.474554538726807e-05,
|
|
"learning_rate": 1.6837835672960831e-07,
|
|
"loss": 0.0604,
|
|
"reward": 0.6684309486299753,
|
|
"reward_std": 0.9398416355252266,
|
|
"step": 169
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 1823.3020782470703,
|
|
"dapo/avg_reward_std": 0.19836447931624748,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.24324324847878637,
|
|
"dapo/num_sampling_attempts": 4.625,
|
|
"dapo/sampling_efficiency": 33.229166666666664,
|
|
"dapo/total_prompts_processed": 27.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.19428571428571428,
|
|
"grad_norm": 0.050460852682590485,
|
|
"kl": 8.266419172286987e-05,
|
|
"learning_rate": 1.6427471468404952e-07,
|
|
"loss": 0.0797,
|
|
"reward": 0.6385768353939056,
|
|
"reward_std": 0.9705075472593307,
|
|
"step": 170
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2620.312515258789,
|
|
"dapo/avg_reward_std": 0.2494219935992185,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.29901961412499933,
|
|
"dapo/num_sampling_attempts": 4.25,
|
|
"dapo/sampling_efficiency": 30.3125,
|
|
"dapo/total_prompts_processed": 25.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.19542857142857142,
|
|
"grad_norm": 0.030058681964874268,
|
|
"kl": 5.9291720390319824e-05,
|
|
"learning_rate": 1.6028856829700258e-07,
|
|
"loss": 0.04,
|
|
"reward": 0.5667276866734028,
|
|
"reward_std": 0.9310731589794159,
|
|
"step": 171
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2728.118064880371,
|
|
"dapo/avg_reward_std": 0.3154246766458858,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.47727273540063336,
|
|
"dapo/num_sampling_attempts": 2.75,
|
|
"dapo/sampling_efficiency": 52.82738095238095,
|
|
"dapo/total_prompts_processed": 16.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.19657142857142856,
|
|
"grad_norm": 0.02854626253247261,
|
|
"kl": 4.601478576660156e-05,
|
|
"learning_rate": 1.5642113178727193e-07,
|
|
"loss": -0.0071,
|
|
"reward": 0.5269420258700848,
|
|
"reward_std": 0.9420886114239693,
|
|
"step": 172
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2000.6111297607422,
|
|
"dapo/avg_reward_std": 0.1943835632221119,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.2657657728807346,
|
|
"dapo/num_sampling_attempts": 4.625,
|
|
"dapo/sampling_efficiency": 38.02083333333333,
|
|
"dapo/total_prompts_processed": 27.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.1977142857142857,
|
|
"grad_norm": 0.033435527235269547,
|
|
"kl": 6.041303277015686e-05,
|
|
"learning_rate": 1.5267358321348285e-07,
|
|
"loss": -0.0116,
|
|
"reward": 0.6523085497319698,
|
|
"reward_std": 0.9166425243020058,
|
|
"step": 173
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2643.138916015625,
|
|
"dapo/avg_reward_std": 0.31710357325417654,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.46031746977851506,
|
|
"dapo/num_sampling_attempts": 2.625,
|
|
"dapo/sampling_efficiency": 51.45833333333333,
|
|
"dapo/total_prompts_processed": 15.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.19885714285714284,
|
|
"grad_norm": 0.02673209458589554,
|
|
"kl": 0.00010142475366592407,
|
|
"learning_rate": 1.4904706411523448e-07,
|
|
"loss": 0.0252,
|
|
"reward": 0.5322555489838123,
|
|
"reward_std": 0.9057421013712883,
|
|
"step": 174
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2441.3437576293945,
|
|
"dapo/avg_reward_std": 0.30628569194903743,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.38461538977347887,
|
|
"dapo/num_sampling_attempts": 3.25,
|
|
"dapo/sampling_efficiency": 41.041666666666664,
|
|
"dapo/total_prompts_processed": 19.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.2,
|
|
"grad_norm": 0.04055117443203926,
|
|
"kl": 4.247203469276428e-05,
|
|
"learning_rate": 1.4554267916537495e-07,
|
|
"loss": 0.0974,
|
|
"reward": 0.6256343480199575,
|
|
"reward_std": 0.9141717404127121,
|
|
"step": 175
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2001.5173797607422,
|
|
"dapo/avg_reward_std": 0.28915207616744504,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3817204381189039,
|
|
"dapo/num_sampling_attempts": 3.875,
|
|
"dapo/sampling_efficiency": 29.285714285714285,
|
|
"dapo/total_prompts_processed": 23.25,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.20114285714285715,
|
|
"grad_norm": 0.03139885142445564,
|
|
"kl": 8.495151996612549e-05,
|
|
"learning_rate": 1.4216149583350755e-07,
|
|
"loss": 0.0178,
|
|
"reward": 0.5467482833191752,
|
|
"reward_std": 0.9077746942639351,
|
|
"step": 176
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2707.1875,
|
|
"dapo/avg_reward_std": 0.2716821462943636,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3620689732247385,
|
|
"dapo/num_sampling_attempts": 3.625,
|
|
"dapo/sampling_efficiency": 37.5,
|
|
"dapo/total_prompts_processed": 21.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.2022857142857143,
|
|
"grad_norm": 0.027195578441023827,
|
|
"kl": 3.4984201192855835e-05,
|
|
"learning_rate": 1.3890454406082956e-07,
|
|
"loss": 0.0243,
|
|
"reward": 0.4738291520625353,
|
|
"reward_std": 0.9582962840795517,
|
|
"step": 177
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2927.2534790039062,
|
|
"dapo/avg_reward_std": 0.2845180779695511,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3750000127724239,
|
|
"dapo/num_sampling_attempts": 3.5,
|
|
"dapo/sampling_efficiency": 34.49404761904761,
|
|
"dapo/total_prompts_processed": 21.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.20342857142857143,
|
|
"grad_norm": 0.0315893292427063,
|
|
"kl": 9.309500455856323e-05,
|
|
"learning_rate": 1.3577281594640182e-07,
|
|
"loss": 0.067,
|
|
"reward": 0.52550208568573,
|
|
"reward_std": 0.9910342618823051,
|
|
"step": 178
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2337.701400756836,
|
|
"dapo/avg_reward_std": 0.18291032314300537,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.25438597092502996,
|
|
"dapo/num_sampling_attempts": 4.75,
|
|
"dapo/sampling_efficiency": 32.81249999999999,
|
|
"dapo/total_prompts_processed": 28.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.20457142857142857,
|
|
"grad_norm": 0.031005509197711945,
|
|
"kl": 9.676814079284668e-05,
|
|
"learning_rate": 1.3276726544494571e-07,
|
|
"loss": 0.0165,
|
|
"reward": 0.6187671273946762,
|
|
"reward_std": 0.9665273353457451,
|
|
"step": 179
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2257.3958892822266,
|
|
"dapo/avg_reward_std": 0.20009312199221718,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.30092593158284825,
|
|
"dapo/num_sampling_attempts": 4.5,
|
|
"dapo/sampling_efficiency": 40.95238095238095,
|
|
"dapo/total_prompts_processed": 27.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.2057142857142857,
|
|
"grad_norm": 0.0394003801047802,
|
|
"kl": 6.996467709541321e-05,
|
|
"learning_rate": 1.2988880807625927e-07,
|
|
"loss": 0.0627,
|
|
"reward": 0.7572303153574467,
|
|
"reward_std": 0.9510952234268188,
|
|
"step": 180
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2533.9375534057617,
|
|
"dapo/avg_reward_std": 0.37206994990507763,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.5740740895271301,
|
|
"dapo/num_sampling_attempts": 2.25,
|
|
"dapo/sampling_efficiency": 51.041666666666664,
|
|
"dapo/total_prompts_processed": 13.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.20685714285714285,
|
|
"grad_norm": 0.03264293819665909,
|
|
"kl": 4.2844563722610474e-05,
|
|
"learning_rate": 1.2713832064634125e-07,
|
|
"loss": 0.0513,
|
|
"reward": 0.7092031128704548,
|
|
"reward_std": 1.0104939341545105,
|
|
"step": 181
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2425.8055572509766,
|
|
"dapo/avg_reward_std": 0.275991202547,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.384615390919722,
|
|
"dapo/num_sampling_attempts": 3.25,
|
|
"dapo/sampling_efficiency": 41.041666666666664,
|
|
"dapo/total_prompts_processed": 19.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.208,
|
|
"grad_norm": 0.033197954297065735,
|
|
"kl": 6.585032679140568e-05,
|
|
"learning_rate": 1.2451664098030743e-07,
|
|
"loss": 0.0327,
|
|
"reward": 0.5725661776959896,
|
|
"reward_std": 0.9082557633519173,
|
|
"step": 182
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2288.0799255371094,
|
|
"dapo/avg_reward_std": 0.31956043162129144,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.4318181872367859,
|
|
"dapo/num_sampling_attempts": 2.75,
|
|
"dapo/sampling_efficiency": 52.083333333333336,
|
|
"dapo/total_prompts_processed": 16.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.20914285714285713,
|
|
"grad_norm": 0.0300610288977623,
|
|
"kl": 9.128451347351074e-05,
|
|
"learning_rate": 1.220245676671809e-07,
|
|
"loss": 0.0567,
|
|
"reward": 0.7111962893977761,
|
|
"reward_std": 0.9172193482518196,
|
|
"step": 183
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2212.138900756836,
|
|
"dapo/avg_reward_std": 0.31106447339057924,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.4066666769981384,
|
|
"dapo/num_sampling_attempts": 3.125,
|
|
"dapo/sampling_efficiency": 40.97222222222222,
|
|
"dapo/total_prompts_processed": 18.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.2102857142857143,
|
|
"grad_norm": 0.03711786866188049,
|
|
"kl": 9.056925773620605e-05,
|
|
"learning_rate": 1.1966285981663407e-07,
|
|
"loss": 0.0405,
|
|
"reward": 0.505124656483531,
|
|
"reward_std": 0.9274496361613274,
|
|
"step": 184
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2350.8820037841797,
|
|
"dapo/avg_reward_std": 0.21689824704770688,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3209876600239012,
|
|
"dapo/num_sampling_attempts": 3.375,
|
|
"dapo/sampling_efficiency": 47.22222222222222,
|
|
"dapo/total_prompts_processed": 20.25,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.21142857142857144,
|
|
"grad_norm": 0.03581295162439346,
|
|
"kl": 0.00011820532381534576,
|
|
"learning_rate": 1.1743223682775649e-07,
|
|
"loss": 0.0582,
|
|
"reward": 0.6189532484859228,
|
|
"reward_std": 0.92426348477602,
|
|
"step": 185
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2414.6770629882812,
|
|
"dapo/avg_reward_std": 0.26570350316263014,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.33333333749924937,
|
|
"dapo/num_sampling_attempts": 3.875,
|
|
"dapo/sampling_efficiency": 41.785714285714285,
|
|
"dapo/total_prompts_processed": 23.25,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.21257142857142858,
|
|
"grad_norm": 0.04000677913427353,
|
|
"kl": 5.166977643966675e-05,
|
|
"learning_rate": 1.1533337816991931e-07,
|
|
"loss": 0.0842,
|
|
"reward": 0.6384202986955643,
|
|
"reward_std": 0.9535242542624474,
|
|
"step": 186
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2179.180564880371,
|
|
"dapo/avg_reward_std": 0.267340756695846,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.362068974766238,
|
|
"dapo/num_sampling_attempts": 3.625,
|
|
"dapo/sampling_efficiency": 31.25,
|
|
"dapo/total_prompts_processed": 21.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.21371428571428572,
|
|
"grad_norm": 0.03956381976604462,
|
|
"kl": 7.00727105140686e-05,
|
|
"learning_rate": 1.1336692317580158e-07,
|
|
"loss": 0.0838,
|
|
"reward": 0.6583898914977908,
|
|
"reward_std": 0.9566742405295372,
|
|
"step": 187
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2340.65975189209,
|
|
"dapo/avg_reward_std": 0.19622711837291718,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.31770833721384406,
|
|
"dapo/num_sampling_attempts": 4.0,
|
|
"dapo/sampling_efficiency": 55.51136363636363,
|
|
"dapo/total_prompts_processed": 24.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.21485714285714286,
|
|
"grad_norm": 0.03709344565868378,
|
|
"kl": 9.210407733917236e-05,
|
|
"learning_rate": 1.1153347084664419e-07,
|
|
"loss": 0.0542,
|
|
"reward": 0.5126780550926924,
|
|
"reward_std": 0.9266727864742279,
|
|
"step": 188
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 3183.7395782470703,
|
|
"dapo/avg_reward_std": 0.19985724004303537,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.23577236293292628,
|
|
"dapo/num_sampling_attempts": 5.125,
|
|
"dapo/sampling_efficiency": 23.1547619047619,
|
|
"dapo/total_prompts_processed": 30.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.216,
|
|
"grad_norm": 0.025569448247551918,
|
|
"kl": 3.505079075694084e-05,
|
|
"learning_rate": 1.0983357966978745e-07,
|
|
"loss": 0.0446,
|
|
"reward": 0.524140851572156,
|
|
"reward_std": 0.9313696026802063,
|
|
"step": 189
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2137.0764083862305,
|
|
"dapo/avg_reward_std": 0.2310014808177948,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.33333334028720857,
|
|
"dapo/num_sampling_attempts": 3.125,
|
|
"dapo/sampling_efficiency": 49.479166666666664,
|
|
"dapo/total_prompts_processed": 18.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.21714285714285714,
|
|
"grad_norm": 0.049144402146339417,
|
|
"kl": 0.00011414289474487305,
|
|
"learning_rate": 1.0826776744855121e-07,
|
|
"loss": 0.0597,
|
|
"reward": 0.6003488898277283,
|
|
"reward_std": 0.9967769384384155,
|
|
"step": 190
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2711.965301513672,
|
|
"dapo/avg_reward_std": 0.27090639670689903,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3388888930281003,
|
|
"dapo/num_sampling_attempts": 3.75,
|
|
"dapo/sampling_efficiency": 42.604166666666664,
|
|
"dapo/total_prompts_processed": 22.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.21828571428571428,
|
|
"grad_norm": 0.03207146376371384,
|
|
"kl": 7.285922765731812e-05,
|
|
"learning_rate": 1.068365111445064e-07,
|
|
"loss": 0.0774,
|
|
"reward": 0.5157463289797306,
|
|
"reward_std": 0.9445067569613457,
|
|
"step": 191
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2634.809066772461,
|
|
"dapo/avg_reward_std": 0.23276896492854968,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.29729730414377675,
|
|
"dapo/num_sampling_attempts": 4.625,
|
|
"dapo/sampling_efficiency": 31.38888888888889,
|
|
"dapo/total_prompts_processed": 27.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.21942857142857142,
|
|
"grad_norm": 0.026157336309552193,
|
|
"kl": 4.951097071170807e-05,
|
|
"learning_rate": 1.0554024673218806e-07,
|
|
"loss": 0.0183,
|
|
"reward": 0.4917615167796612,
|
|
"reward_std": 0.932147391140461,
|
|
"step": 192
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2687.6562423706055,
|
|
"dapo/avg_reward_std": 0.1842694640159607,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3000000034769376,
|
|
"dapo/num_sampling_attempts": 3.75,
|
|
"dapo/sampling_efficiency": 37.20238095238095,
|
|
"dapo/total_prompts_processed": 22.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.22057142857142858,
|
|
"grad_norm": 0.036305345594882965,
|
|
"kl": 5.197897553443909e-05,
|
|
"learning_rate": 1.0437936906629334e-07,
|
|
"loss": 0.0737,
|
|
"reward": 0.8177419528365135,
|
|
"reward_std": 0.9367102533578873,
|
|
"step": 193
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2567.093780517578,
|
|
"dapo/avg_reward_std": 0.2292217422615398,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.30808081364992895,
|
|
"dapo/num_sampling_attempts": 4.125,
|
|
"dapo/sampling_efficiency": 36.284722222222214,
|
|
"dapo/total_prompts_processed": 24.75,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.22171428571428572,
|
|
"grad_norm": 0.03788081929087639,
|
|
"kl": 8.841603994369507e-05,
|
|
"learning_rate": 1.0335423176140511e-07,
|
|
"loss": 0.0745,
|
|
"reward": 0.4994155182503164,
|
|
"reward_std": 0.9395617768168449,
|
|
"step": 194
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2132.22225189209,
|
|
"dapo/avg_reward_std": 0.23152823698136113,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.34408602791447795,
|
|
"dapo/num_sampling_attempts": 3.875,
|
|
"dapo/sampling_efficiency": 46.800595238095234,
|
|
"dapo/total_prompts_processed": 23.25,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.22285714285714286,
|
|
"grad_norm": 0.03888849914073944,
|
|
"kl": 7.880479097366333e-05,
|
|
"learning_rate": 1.0246514708427701e-07,
|
|
"loss": 0.0078,
|
|
"reward": 0.4982965085655451,
|
|
"reward_std": 0.9277759939432144,
|
|
"step": 195
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2242.8437881469727,
|
|
"dapo/avg_reward_std": 0.2252171416031687,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.27192983148913635,
|
|
"dapo/num_sampling_attempts": 4.75,
|
|
"dapo/sampling_efficiency": 42.49999999999999,
|
|
"dapo/total_prompts_processed": 28.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.224,
|
|
"grad_norm": 0.03532218188047409,
|
|
"kl": 8.079037070274353e-05,
|
|
"learning_rate": 1.017123858587145e-07,
|
|
"loss": -0.0036,
|
|
"reward": 0.6249313289299607,
|
|
"reward_std": 0.9415610581636429,
|
|
"step": 196
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2186.913246154785,
|
|
"dapo/avg_reward_std": 0.2062954322287911,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.26754386566187205,
|
|
"dapo/num_sampling_attempts": 4.75,
|
|
"dapo/sampling_efficiency": 27.549603174603174,
|
|
"dapo/total_prompts_processed": 28.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.22514285714285714,
|
|
"grad_norm": 0.05644107237458229,
|
|
"kl": 0.00012712180614471436,
|
|
"learning_rate": 1.0109617738307911e-07,
|
|
"loss": 0.0266,
|
|
"reward": 0.6248354203999043,
|
|
"reward_std": 0.9687103852629662,
|
|
"step": 197
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2853.7430725097656,
|
|
"dapo/avg_reward_std": 0.2791443226429132,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.41666667277996355,
|
|
"dapo/num_sampling_attempts": 3.25,
|
|
"dapo/sampling_efficiency": 45.3125,
|
|
"dapo/total_prompts_processed": 19.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.22628571428571428,
|
|
"grad_norm": 0.025631451979279518,
|
|
"kl": 7.095187902450562e-05,
|
|
"learning_rate": 1.0061670936044178e-07,
|
|
"loss": 0.0195,
|
|
"reward": 0.683892990462482,
|
|
"reward_std": 0.9487637504935265,
|
|
"step": 198
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2660.218780517578,
|
|
"dapo/avg_reward_std": 0.24377418825259575,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.3910256469478974,
|
|
"dapo/num_sampling_attempts": 3.25,
|
|
"dapo/sampling_efficiency": 56.597222222222214,
|
|
"dapo/total_prompts_processed": 19.5,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.22742857142857142,
|
|
"grad_norm": 0.034018680453300476,
|
|
"kl": 6.149709224700928e-05,
|
|
"learning_rate": 1.002741278414069e-07,
|
|
"loss": 0.0404,
|
|
"reward": 0.565577644854784,
|
|
"reward_std": 0.9079905152320862,
|
|
"step": 199
|
|
},
|
|
{
|
|
"clip_fraction": 0.0,
|
|
"completion_length": 2421.875015258789,
|
|
"dapo/avg_reward_std": 0.3100067762037118,
|
|
"dapo/filter_reward_index": 0.0,
|
|
"dapo/kept_prompts_ratio": 0.4027777823309104,
|
|
"dapo/num_sampling_attempts": 3.0,
|
|
"dapo/sampling_efficiency": 45.83333333333333,
|
|
"dapo/total_prompts_processed": 18.0,
|
|
"dapo/valid_prompts_collected": 6.0,
|
|
"epoch": 0.22857142857142856,
|
|
"grad_norm": 0.030885452404618263,
|
|
"kl": 7.659196853637695e-05,
|
|
"learning_rate": 1.0006853717962393e-07,
|
|
"loss": 0.0132,
|
|
"reward": 0.5110834892839193,
|
|
"reward_std": 0.8930082246661186,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.22857142857142856,
|
|
"step": 200,
|
|
"total_flos": 0.0,
|
|
"train_loss": 0.009447227440541611,
|
|
"train_runtime": 101500.2967,
|
|
"train_samples_per_second": 0.095,
|
|
"train_steps_per_second": 0.002
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 200,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 10,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 6,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|