Files
Qwen2.5-0.5B-Instruct-Gensy…/trainer_state.json

234 lines
8.0 KiB
JSON
Raw Normal View History

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 20,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"completion_length": 228.375,
"epoch": 0.1,
"grad_norm": 57.555362701416016,
"kl": 0.0,
"learning_rate": 4.965903258506806e-07,
"loss": -0.0,
"reward": 4.152295699343085,
"reward_std": 0.8311166568892077,
"rewards/concensus_correctness_reward_func": 1.2107499986886978,
"rewards/consensus_reward_func": 1.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.25,
"rewards/question_recreation_reward_func": 0.7966707283630967,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.21875,
"rewards/xmlcount_reward_func": 0.6761250011622906,
"step": 2
},
{
"completion_length": 201.75,
"epoch": 0.2,
"grad_norm": 23.945329666137695,
"kl": 0.19989765621721745,
"learning_rate": 4.698684378016222e-07,
"loss": 0.0002,
"reward": 7.065919041633606,
"reward_std": 0.5927391643635929,
"rewards/concensus_correctness_reward_func": 2.1215000078082085,
"rewards/consensus_reward_func": 1.875,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.5,
"rewards/question_recreation_reward_func": 0.990481548011303,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.40625,
"rewards/xmlcount_reward_func": 1.1726875007152557,
"step": 4
},
{
"completion_length": 178.375,
"epoch": 0.3,
"grad_norm": 2180.853271484375,
"kl": 5311555.980729777,
"learning_rate": 4.193203929064353e-07,
"loss": 5311.5566,
"reward": 4.848113030195236,
"reward_std": 2.48440220952034,
"rewards/concensus_correctness_reward_func": 1.3193749785423279,
"rewards/consensus_reward_func": 1.375,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0,
"rewards/question_recreation_reward_func": 0.7886755615472794,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.3125,
"rewards/xmlcount_reward_func": 1.0525624975562096,
"step": 6
},
{
"completion_length": 201.125,
"epoch": 0.4,
"grad_norm": 52907.8203125,
"kl": 1112.07909232378,
"learning_rate": 3.5042385616324236e-07,
"loss": 1.1121,
"reward": 5.95790758728981,
"reward_std": 1.2487200200557709,
"rewards/concensus_correctness_reward_func": 1.6839999929070473,
"rewards/consensus_reward_func": 1.5,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.25,
"rewards/question_recreation_reward_func": 0.9379700720310211,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.40625,
"rewards/xmlcount_reward_func": 1.1796875,
"step": 8
},
{
"completion_length": 188.875,
"epoch": 0.5,
"grad_norm": 2865887744.0,
"kl": 40746333.48964184,
"learning_rate": 2.706448363680831e-07,
"loss": 40746.332,
"reward": 7.091725826263428,
"reward_std": 0.6974060980137438,
"rewards/concensus_correctness_reward_func": 2.4067499935626984,
"rewards/consensus_reward_func": 2.0,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.25,
"rewards/question_recreation_reward_func": 0.9519133418798447,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.34375,
"rewards/xmlcount_reward_func": 1.1393124982714653,
"step": 10
},
{
"completion_length": 179.6875,
"epoch": 0.6,
"grad_norm": 2026.5494384765625,
"kl": 44.08751246146858,
"learning_rate": 1.886286282148002e-07,
"loss": 0.0441,
"reward": 6.057031333446503,
"reward_std": 1.4806374236941338,
"rewards/concensus_correctness_reward_func": 1.6842499673366547,
"rewards/consensus_reward_func": 1.625,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.25,
"rewards/question_recreation_reward_func": 0.96484375,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.375,
"rewards/xmlcount_reward_func": 1.1579374969005585,
"step": 12
},
{
"completion_length": 186.75,
"epoch": 0.7,
"grad_norm": 3908203.25,
"kl": 98680.71119815856,
"learning_rate": 1.1326296046939333e-07,
"loss": 98.6807,
"reward": 6.111200124025345,
"reward_std": 0.8898124806582928,
"rewards/concensus_correctness_reward_func": 1.8071250086650252,
"rewards/consensus_reward_func": 1.75,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.25,
"rewards/question_recreation_reward_func": 0.8432626910507679,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.3125,
"rewards/xmlcount_reward_func": 1.1483124941587448,
"step": 14
},
{
"completion_length": 174.1875,
"epoch": 0.8,
"grad_norm": 751.8195190429688,
"kl": 35.69426943734288,
"learning_rate": 5.271487265090163e-08,
"loss": 0.0357,
"reward": 6.600282669067383,
"reward_std": 0.7876708060503006,
"rewards/concensus_correctness_reward_func": 1.9124999642372131,
"rewards/consensus_reward_func": 1.875,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.25,
"rewards/question_recreation_reward_func": 0.8909076675772667,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.4375,
"rewards/xmlcount_reward_func": 1.234375,
"step": 16
},
{
"completion_length": 230.25,
"epoch": 0.9,
"grad_norm": 8160253.5,
"kl": 155507.37884235661,
"learning_rate": 1.3545689574841341e-08,
"loss": 155.5074,
"reward": 6.18654590845108,
"reward_std": 0.4526741732552182,
"rewards/concensus_correctness_reward_func": 1.835249975323677,
"rewards/consensus_reward_func": 1.75,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.125,
"rewards/question_recreation_reward_func": 0.9319210276007652,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.375,
"rewards/xmlcount_reward_func": 1.1693750023841858,
"step": 18
},
{
"completion_length": 159.875,
"epoch": 1.0,
"grad_norm": 15.891400337219238,
"kl": 0.8860930278897285,
"learning_rate": 0.0,
"loss": 0.0009,
"reward": 5.829563498497009,
"reward_std": 1.0952186286449432,
"rewards/concensus_correctness_reward_func": 1.558749981224537,
"rewards/consensus_reward_func": 1.625,
"rewards/cumulative_reward_2": 0.0,
"rewards/final_correctness_reward_func": 0.0,
"rewards/question_recreation_reward_func": 0.9997510015964508,
"rewards/soft_format_reward_func": 0.0,
"rewards/strict_format_reward_func": 0.4375,
"rewards/xmlcount_reward_func": 1.2085624933242798,
"step": 20
},
{
"epoch": 1.0,
"step": 20,
"total_flos": 0.0,
"train_loss": 4631.326969934987,
"train_runtime": 88.7341,
"train_samples_per_second": 1.803,
"train_steps_per_second": 0.225
}
],
"logging_steps": 2,
"max_steps": 20,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 25,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}