Files
Qwen2.5-0.5B-Instruct-Gensy…/trainer_state.json

234 lines
8.0 KiB
JSON
Raw Permalink Normal View History

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 20,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"completion_length": 375.875,
"epoch": 0.1,
"grad_norm": 34.61153793334961,
"kl": 0.0,
"learning_rate": 4.965903258506806e-07,
"loss": 0.0,
"reward": 0.3664499084734416,
"reward_std": 0.37731685693142936,
"rewards/concensus_correctness_reward_func": 0.0,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0,
"rewards/question_recreation_reward_func": 0.24698116456056596,
"rewards/soft_format_reward_func": 0.015625,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.10384375043213367,
"step": 2
},
{
"completion_length": 356.71875,
"epoch": 0.2,
"grad_norm": 27.17987060546875,
"kl": 0.0014200315781636164,
"learning_rate": 4.698684378016222e-07,
"loss": 0.0,
"reward": 0.44327147863805294,
"reward_std": 0.5475165799725801,
"rewards/concensus_correctness_reward_func": 0.015687499195337296,
"rewards/consensus_reward_func": 0.0625,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0625,
"rewards/question_recreation_reward_func": 0.27499022823758423,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.02759375609457493,
"step": 4
},
{
"completion_length": 364.78125,
"epoch": 0.3,
"grad_norm": 8.681589126586914,
"kl": 0.01875807526448625,
"learning_rate": 4.193203929064353e-07,
"loss": 0.0,
"reward": 0.3844468754250556,
"reward_std": 0.26774020673474297,
"rewards/concensus_correctness_reward_func": 0.0,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0,
"rewards/question_recreation_reward_func": 0.31691562850028276,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.06753125367686152,
"step": 6
},
{
"completion_length": 363.6875,
"epoch": 0.4,
"grad_norm": 76.04808807373047,
"kl": 0.008640145704703173,
"learning_rate": 3.5042385616324236e-07,
"loss": 0.0,
"reward": 0.1568439636612311,
"reward_std": 0.5598289684858173,
"rewards/concensus_correctness_reward_func": 0.0,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0625,
"rewards/question_recreation_reward_func": 0.18931270475150086,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": -0.09496874565957114,
"step": 8
},
{
"completion_length": 395.5625,
"epoch": 0.5,
"grad_norm": 8.20077133178711,
"kl": 0.00585838263577898,
"learning_rate": 2.706448363680831e-07,
"loss": 0.0,
"reward": 0.44814145751297474,
"reward_std": 0.4613042630953714,
"rewards/concensus_correctness_reward_func": 0.0,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0625,
"rewards/question_recreation_reward_func": 0.2386414643842727,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.147000000346452,
"step": 10
},
{
"completion_length": 333.625,
"epoch": 0.6,
"grad_norm": 9.751823425292969,
"kl": 0.9474406025801727,
"learning_rate": 1.886286282148002e-07,
"loss": 0.0009,
"reward": 0.49279772784211673,
"reward_std": 0.411659850156866,
"rewards/concensus_correctness_reward_func": 0.0,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0625,
"rewards/question_recreation_reward_func": 0.3130477310915012,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.11725000198930502,
"step": 12
},
{
"completion_length": 257.25,
"epoch": 0.7,
"grad_norm": 7.342770099639893,
"kl": 0.00215080863199546,
"learning_rate": 1.1326296046939333e-07,
"loss": 0.0,
"reward": 0.44488345994614065,
"reward_std": 0.318172043771483,
"rewards/concensus_correctness_reward_func": 0.0,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0625,
"rewards/question_recreation_reward_func": 0.19450845930259675,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": 0.18787499610334635,
"step": 14
},
{
"completion_length": 390.46875,
"epoch": 0.8,
"grad_norm": 4.781615257263184,
"kl": 0.0012646604518522508,
"learning_rate": 5.271487265090163e-08,
"loss": 0.0,
"reward": 0.4826530911959708,
"reward_std": 0.7335095015587285,
"rewards/concensus_correctness_reward_func": 0.0,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.25,
"rewards/question_recreation_reward_func": 0.42434057663194835,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": -0.1916874992166413,
"step": 16
},
{
"completion_length": 397.03125,
"epoch": 0.9,
"grad_norm": 8.631686210632324,
"kl": 0.23009972504951293,
"learning_rate": 1.3545689574841341e-08,
"loss": 0.0002,
"reward": 1.1463897689245641,
"reward_std": 1.4765515620936185,
"rewards/concensus_correctness_reward_func": 0.7297499999403954,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.1875,
"rewards/question_recreation_reward_func": 0.2630148070747964,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": -0.03387500322423875,
"step": 18
},
{
"completion_length": 307.84375,
"epoch": 1.0,
"grad_norm": 68.26083374023438,
"kl": 3.183381193990499,
"learning_rate": 0.0,
"loss": 0.0032,
"reward": -0.019438669085502625,
"reward_std": 0.5264092059223913,
"rewards/concensus_correctness_reward_func": 0.0,
"rewards/consensus_reward_func": 0.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0,
"rewards/question_recreation_reward_func": 0.16081133193802088,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.0,
"rewards/xmlcount_reward_func": -0.18025000300258398,
"step": 20
},
{
"epoch": 1.0,
"step": 20,
"total_flos": 0.0,
"train_loss": 0.0004398912084980111,
"train_runtime": 308.8876,
"train_samples_per_second": 1.036,
"train_steps_per_second": 0.065
}
],
"logging_steps": 2,
"max_steps": 20,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 25,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}