234 lines
8.2 KiB
JSON
234 lines
8.2 KiB
JSON
|
|
{
|
||
|
|
"best_global_step": null,
|
||
|
|
"best_metric": null,
|
||
|
|
"best_model_checkpoint": null,
|
||
|
|
"epoch": 5.0,
|
||
|
|
"eval_steps": 500,
|
||
|
|
"global_step": 20,
|
||
|
|
"is_hyper_param_search": false,
|
||
|
|
"is_local_process_zero": true,
|
||
|
|
"is_world_process_zero": true,
|
||
|
|
"log_history": [
|
||
|
|
{
|
||
|
|
"completion_length": 232.21875,
|
||
|
|
"epoch": 0.5714285714285714,
|
||
|
|
"grad_norm": 44.04344940185547,
|
||
|
|
"kl": 0.0,
|
||
|
|
"learning_rate": 5e-07,
|
||
|
|
"loss": 0.0,
|
||
|
|
"reward": 2.9121293793432415,
|
||
|
|
"reward_std": 0.6280596783617511,
|
||
|
|
"rewards/concensus_correctness_reward_func": 0.5593750020489097,
|
||
|
|
"rewards/consensus_reward_func": 0.9375,
|
||
|
|
"rewards/cumulative_reward_2": 0.0,
|
||
|
|
"rewards/final_correctness_reward_func": 0.0,
|
||
|
|
"rewards/question_recreation_reward_func": 0.6154730841517448,
|
||
|
|
"rewards/soft_format_reward_func": 0.0,
|
||
|
|
"rewards/strict_format_reward_func": 0.15625,
|
||
|
|
"rewards/xmlcount_reward_func": 0.6435312470421195,
|
||
|
|
"step": 2
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"completion_length": 178.70833333333334,
|
||
|
|
"epoch": 1.0,
|
||
|
|
"grad_norm": 146.21438598632812,
|
||
|
|
"kl": 8.268213170580566,
|
||
|
|
"learning_rate": 4.864543104251586e-07,
|
||
|
|
"loss": 0.0062,
|
||
|
|
"reward": 5.387973050276439,
|
||
|
|
"reward_std": 0.6165593440334002,
|
||
|
|
"rewards/concensus_correctness_reward_func": 1.5517499844233196,
|
||
|
|
"rewards/consensus_reward_func": 1.5,
|
||
|
|
"rewards/cumulative_reward_2": 0.0,
|
||
|
|
"rewards/final_correctness_reward_func": 0.16666666666666666,
|
||
|
|
"rewards/question_recreation_reward_func": 0.8114312589168549,
|
||
|
|
"rewards/soft_format_reward_func": 0.0,
|
||
|
|
"rewards/strict_format_reward_func": 0.25,
|
||
|
|
"rewards/xmlcount_reward_func": 1.108125001192093,
|
||
|
|
"step": 4
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"completion_length": 150.28125,
|
||
|
|
"epoch": 1.5714285714285714,
|
||
|
|
"grad_norm": 19.16786003112793,
|
||
|
|
"kl": 2.884129573008977,
|
||
|
|
"learning_rate": 4.472851273490984e-07,
|
||
|
|
"loss": 0.0029,
|
||
|
|
"reward": 5.311543390154839,
|
||
|
|
"reward_std": 0.7719176085665822,
|
||
|
|
"rewards/concensus_correctness_reward_func": 1.4498750008642673,
|
||
|
|
"rewards/consensus_reward_func": 1.5,
|
||
|
|
"rewards/cumulative_reward_2": 0.0,
|
||
|
|
"rewards/final_correctness_reward_func": 0.125,
|
||
|
|
"rewards/question_recreation_reward_func": 0.71429343521595,
|
||
|
|
"rewards/soft_format_reward_func": 0.0,
|
||
|
|
"rewards/strict_format_reward_func": 0.359375,
|
||
|
|
"rewards/xmlcount_reward_func": 1.163000002503395,
|
||
|
|
"step": 6
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"completion_length": 184.33333333333334,
|
||
|
|
"epoch": 2.0,
|
||
|
|
"grad_norm": 18.322921752929688,
|
||
|
|
"kl": 1.2294259141975392,
|
||
|
|
"learning_rate": 3.867370395306068e-07,
|
||
|
|
"loss": 0.0009,
|
||
|
|
"reward": 4.496874541044235,
|
||
|
|
"reward_std": 0.8491996126249433,
|
||
|
|
"rewards/concensus_correctness_reward_func": 1.015833326925834,
|
||
|
|
"rewards/consensus_reward_func": 1.5,
|
||
|
|
"rewards/cumulative_reward_2": 0.0,
|
||
|
|
"rewards/final_correctness_reward_func": 0.0,
|
||
|
|
"rewards/question_recreation_reward_func": 0.6787495116392771,
|
||
|
|
"rewards/soft_format_reward_func": 0.0,
|
||
|
|
"rewards/strict_format_reward_func": 0.25,
|
||
|
|
"rewards/xmlcount_reward_func": 1.0522916664679844,
|
||
|
|
"step": 8
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"completion_length": 154.625,
|
||
|
|
"epoch": 2.571428571428571,
|
||
|
|
"grad_norm": 17.83030128479004,
|
||
|
|
"kl": 8.403996711946093,
|
||
|
|
"learning_rate": 3.1137137178519977e-07,
|
||
|
|
"loss": 0.0084,
|
||
|
|
"reward": 4.695109188556671,
|
||
|
|
"reward_std": 0.9860417204909027,
|
||
|
|
"rewards/concensus_correctness_reward_func": 1.0651875026524067,
|
||
|
|
"rewards/consensus_reward_func": 1.375,
|
||
|
|
"rewards/cumulative_reward_2": 0.0,
|
||
|
|
"rewards/final_correctness_reward_func": 0.0,
|
||
|
|
"rewards/question_recreation_reward_func": 0.7041404494084418,
|
||
|
|
"rewards/soft_format_reward_func": 0.0,
|
||
|
|
"rewards/strict_format_reward_func": 0.375,
|
||
|
|
"rewards/xmlcount_reward_func": 1.17578125,
|
||
|
|
"step": 10
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"completion_length": 134.54166666666666,
|
||
|
|
"epoch": 3.0,
|
||
|
|
"grad_norm": 17.385499954223633,
|
||
|
|
"kl": 28912.023177654482,
|
||
|
|
"learning_rate": 2.2935516363191693e-07,
|
||
|
|
"loss": 21.684,
|
||
|
|
"reward": 5.548654953638713,
|
||
|
|
"reward_std": 0.5412371944015225,
|
||
|
|
"rewards/concensus_correctness_reward_func": 1.4962499924004078,
|
||
|
|
"rewards/consensus_reward_func": 1.5833333333333333,
|
||
|
|
"rewards/cumulative_reward_2": 0.0,
|
||
|
|
"rewards/final_correctness_reward_func": 0.16666666666666666,
|
||
|
|
"rewards/question_recreation_reward_func": 0.8024049550294876,
|
||
|
|
"rewards/soft_format_reward_func": 0.0,
|
||
|
|
"rewards/strict_format_reward_func": 0.3125,
|
||
|
|
"rewards/xmlcount_reward_func": 1.1875,
|
||
|
|
"step": 12
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"completion_length": 140.71875,
|
||
|
|
"epoch": 3.571428571428571,
|
||
|
|
"grad_norm": 1344.9619140625,
|
||
|
|
"kl": 6139.031279045157,
|
||
|
|
"learning_rate": 1.4957614383675767e-07,
|
||
|
|
"loss": 6.139,
|
||
|
|
"reward": 5.497947037220001,
|
||
|
|
"reward_std": 0.4656379229563754,
|
||
|
|
"rewards/concensus_correctness_reward_func": 1.4404374985024333,
|
||
|
|
"rewards/consensus_reward_func": 1.5625,
|
||
|
|
"rewards/cumulative_reward_2": 0.0,
|
||
|
|
"rewards/final_correctness_reward_func": 0.125,
|
||
|
|
"rewards/question_recreation_reward_func": 0.8114158157259226,
|
||
|
|
"rewards/soft_format_reward_func": 0.0,
|
||
|
|
"rewards/strict_format_reward_func": 0.359375,
|
||
|
|
"rewards/xmlcount_reward_func": 1.19921875,
|
||
|
|
"step": 14
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"completion_length": 160.54166666666666,
|
||
|
|
"epoch": 4.0,
|
||
|
|
"grad_norm": 12.484821319580078,
|
||
|
|
"kl": 8.58236723113805,
|
||
|
|
"learning_rate": 8.067960709356478e-08,
|
||
|
|
"loss": 0.0064,
|
||
|
|
"reward": 4.575502196947734,
|
||
|
|
"reward_std": 1.0911046511416014,
|
||
|
|
"rewards/concensus_correctness_reward_func": 0.9780833274126053,
|
||
|
|
"rewards/consensus_reward_func": 1.3333333333333333,
|
||
|
|
"rewards/cumulative_reward_2": 0.0,
|
||
|
|
"rewards/final_correctness_reward_func": 0.0,
|
||
|
|
"rewards/question_recreation_reward_func": 0.7998771816492081,
|
||
|
|
"rewards/soft_format_reward_func": 0.0,
|
||
|
|
"rewards/strict_format_reward_func": 0.2916666666666667,
|
||
|
|
"rewards/xmlcount_reward_func": 1.1725416680177052,
|
||
|
|
"step": 16
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"completion_length": 138.53125,
|
||
|
|
"epoch": 4.571428571428571,
|
||
|
|
"grad_norm": 572.0084228515625,
|
||
|
|
"kl": 28.125015974976122,
|
||
|
|
"learning_rate": 3.013156219837776e-08,
|
||
|
|
"loss": 0.0281,
|
||
|
|
"reward": 5.397134527564049,
|
||
|
|
"reward_std": 0.664117572363466,
|
||
|
|
"rewards/concensus_correctness_reward_func": 1.403562496881932,
|
||
|
|
"rewards/consensus_reward_func": 1.6875,
|
||
|
|
"rewards/cumulative_reward_2": 0.0,
|
||
|
|
"rewards/final_correctness_reward_func": 0.0625,
|
||
|
|
"rewards/question_recreation_reward_func": 0.7240407671779394,
|
||
|
|
"rewards/soft_format_reward_func": 0.0,
|
||
|
|
"rewards/strict_format_reward_func": 0.34375,
|
||
|
|
"rewards/xmlcount_reward_func": 1.17578125,
|
||
|
|
"step": 18
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"completion_length": 182.375,
|
||
|
|
"epoch": 5.0,
|
||
|
|
"grad_norm": 16.89112091064453,
|
||
|
|
"kl": 497.25228943574865,
|
||
|
|
"learning_rate": 3.4096741493194193e-09,
|
||
|
|
"loss": 0.3729,
|
||
|
|
"reward": 4.701884349187215,
|
||
|
|
"reward_std": 0.9557839000287155,
|
||
|
|
"rewards/concensus_correctness_reward_func": 1.163333331545194,
|
||
|
|
"rewards/consensus_reward_func": 1.25,
|
||
|
|
"rewards/cumulative_reward_2": 0.0,
|
||
|
|
"rewards/final_correctness_reward_func": 0.0,
|
||
|
|
"rewards/question_recreation_reward_func": 0.7833427041769028,
|
||
|
|
"rewards/soft_format_reward_func": 0.0,
|
||
|
|
"rewards/strict_format_reward_func": 0.3541666666666667,
|
||
|
|
"rewards/xmlcount_reward_func": 1.1510416666666667,
|
||
|
|
"step": 20
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 5.0,
|
||
|
|
"step": 20,
|
||
|
|
"total_flos": 0.0,
|
||
|
|
"train_loss": 2.8248958706099074,
|
||
|
|
"train_runtime": 118.0721,
|
||
|
|
"train_samples_per_second": 2.71,
|
||
|
|
"train_steps_per_second": 0.169
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"logging_steps": 2,
|
||
|
|
"max_steps": 20,
|
||
|
|
"num_input_tokens_seen": 0,
|
||
|
|
"num_train_epochs": 7,
|
||
|
|
"save_steps": 25,
|
||
|
|
"stateful_callbacks": {
|
||
|
|
"TrainerControl": {
|
||
|
|
"args": {
|
||
|
|
"should_epoch_stop": false,
|
||
|
|
"should_evaluate": false,
|
||
|
|
"should_log": false,
|
||
|
|
"should_save": true,
|
||
|
|
"should_training_stop": true
|
||
|
|
},
|
||
|
|
"attributes": {}
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"total_flos": 0.0,
|
||
|
|
"train_batch_size": 2,
|
||
|
|
"trial_name": null,
|
||
|
|
"trial_params": null
|
||
|
|
}
|