Files
Qwen2.5-0.5B-Instruct-Gensy…/trainer_state.json

234 lines
8.2 KiB
JSON
Raw Normal View History

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"eval_steps": 500,
"global_step": 20,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"completion_length": 232.21875,
"epoch": 0.5714285714285714,
"grad_norm": 44.04344940185547,
"kl": 0.0,
"learning_rate": 5e-07,
"loss": 0.0,
"reward": 2.9121293793432415,
"reward_std": 0.6280596783617511,
"rewards/concensus_correctness_reward_func": 0.5593750020489097,
"rewards/consensus_reward_func": 0.9375,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0,
"rewards/question_recreation_reward_func": 0.6154730841517448,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.15625,
"rewards/xmlcount_reward_func": 0.6435312470421195,
"step": 2
},
{
"completion_length": 178.70833333333334,
"epoch": 1.0,
"grad_norm": 146.21438598632812,
"kl": 8.268213170580566,
"learning_rate": 4.864543104251586e-07,
"loss": 0.0062,
"reward": 5.387973050276439,
"reward_std": 0.6165593440334002,
"rewards/concensus_correctness_reward_func": 1.5517499844233196,
"rewards/consensus_reward_func": 1.5,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.16666666666666666,
"rewards/question_recreation_reward_func": 0.8114312589168549,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.25,
"rewards/xmlcount_reward_func": 1.108125001192093,
"step": 4
},
{
"completion_length": 150.28125,
"epoch": 1.5714285714285714,
"grad_norm": 19.16786003112793,
"kl": 2.884129573008977,
"learning_rate": 4.472851273490984e-07,
"loss": 0.0029,
"reward": 5.311543390154839,
"reward_std": 0.7719176085665822,
"rewards/concensus_correctness_reward_func": 1.4498750008642673,
"rewards/consensus_reward_func": 1.5,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.125,
"rewards/question_recreation_reward_func": 0.71429343521595,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.359375,
"rewards/xmlcount_reward_func": 1.163000002503395,
"step": 6
},
{
"completion_length": 184.33333333333334,
"epoch": 2.0,
"grad_norm": 18.322921752929688,
"kl": 1.2294259141975392,
"learning_rate": 3.867370395306068e-07,
"loss": 0.0009,
"reward": 4.496874541044235,
"reward_std": 0.8491996126249433,
"rewards/concensus_correctness_reward_func": 1.015833326925834,
"rewards/consensus_reward_func": 1.5,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0,
"rewards/question_recreation_reward_func": 0.6787495116392771,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.25,
"rewards/xmlcount_reward_func": 1.0522916664679844,
"step": 8
},
{
"completion_length": 154.625,
"epoch": 2.571428571428571,
"grad_norm": 17.83030128479004,
"kl": 8.403996711946093,
"learning_rate": 3.1137137178519977e-07,
"loss": 0.0084,
"reward": 4.695109188556671,
"reward_std": 0.9860417204909027,
"rewards/concensus_correctness_reward_func": 1.0651875026524067,
"rewards/consensus_reward_func": 1.375,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0,
"rewards/question_recreation_reward_func": 0.7041404494084418,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.375,
"rewards/xmlcount_reward_func": 1.17578125,
"step": 10
},
{
"completion_length": 134.54166666666666,
"epoch": 3.0,
"grad_norm": 17.385499954223633,
"kl": 28912.023177654482,
"learning_rate": 2.2935516363191693e-07,
"loss": 21.684,
"reward": 5.548654953638713,
"reward_std": 0.5412371944015225,
"rewards/concensus_correctness_reward_func": 1.4962499924004078,
"rewards/consensus_reward_func": 1.5833333333333333,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.16666666666666666,
"rewards/question_recreation_reward_func": 0.8024049550294876,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.3125,
"rewards/xmlcount_reward_func": 1.1875,
"step": 12
},
{
"completion_length": 140.71875,
"epoch": 3.571428571428571,
"grad_norm": 1344.9619140625,
"kl": 6139.031279045157,
"learning_rate": 1.4957614383675767e-07,
"loss": 6.139,
"reward": 5.497947037220001,
"reward_std": 0.4656379229563754,
"rewards/concensus_correctness_reward_func": 1.4404374985024333,
"rewards/consensus_reward_func": 1.5625,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.125,
"rewards/question_recreation_reward_func": 0.8114158157259226,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.359375,
"rewards/xmlcount_reward_func": 1.19921875,
"step": 14
},
{
"completion_length": 160.54166666666666,
"epoch": 4.0,
"grad_norm": 12.484821319580078,
"kl": 8.58236723113805,
"learning_rate": 8.067960709356478e-08,
"loss": 0.0064,
"reward": 4.575502196947734,
"reward_std": 1.0911046511416014,
"rewards/concensus_correctness_reward_func": 0.9780833274126053,
"rewards/consensus_reward_func": 1.3333333333333333,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0,
"rewards/question_recreation_reward_func": 0.7998771816492081,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.2916666666666667,
"rewards/xmlcount_reward_func": 1.1725416680177052,
"step": 16
},
{
"completion_length": 138.53125,
"epoch": 4.571428571428571,
"grad_norm": 572.0084228515625,
"kl": 28.125015974976122,
"learning_rate": 3.013156219837776e-08,
"loss": 0.0281,
"reward": 5.397134527564049,
"reward_std": 0.664117572363466,
"rewards/concensus_correctness_reward_func": 1.403562496881932,
"rewards/consensus_reward_func": 1.6875,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0625,
"rewards/question_recreation_reward_func": 0.7240407671779394,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.34375,
"rewards/xmlcount_reward_func": 1.17578125,
"step": 18
},
{
"completion_length": 182.375,
"epoch": 5.0,
"grad_norm": 16.89112091064453,
"kl": 497.25228943574865,
"learning_rate": 3.4096741493194193e-09,
"loss": 0.3729,
"reward": 4.701884349187215,
"reward_std": 0.9557839000287155,
"rewards/concensus_correctness_reward_func": 1.163333331545194,
"rewards/consensus_reward_func": 1.25,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0,
"rewards/question_recreation_reward_func": 0.7833427041769028,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.3541666666666667,
"rewards/xmlcount_reward_func": 1.1510416666666667,
"step": 20
},
{
"epoch": 5.0,
"step": 20,
"total_flos": 0.0,
"train_loss": 2.8248958706099074,
"train_runtime": 118.0721,
"train_samples_per_second": 2.71,
"train_steps_per_second": 0.169
}
],
"logging_steps": 2,
"max_steps": 20,
"num_input_tokens_seen": 0,
"num_train_epochs": 7,
"save_steps": 25,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}