Model: fuasfh1jjh1/Qwen2.5-0.5B-Instruct-Gensyn-Swarm-fanged_barky_skunk Source: Original Platform
234 lines
8.0 KiB
JSON
234 lines
8.0 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 1.0,
|
|
"eval_steps": 500,
|
|
"global_step": 20,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"completion_length": 228.375,
|
|
"epoch": 0.1,
|
|
"grad_norm": 57.555362701416016,
|
|
"kl": 0.0,
|
|
"learning_rate": 4.965903258506806e-07,
|
|
"loss": -0.0,
|
|
"reward": 4.152295699343085,
|
|
"reward_std": 0.8311166568892077,
|
|
"rewards/concensus_correctness_reward_func": 1.2107499986886978,
|
|
"rewards/consensus_reward_func": 1.0,
|
|
"rewards/cumulative_reward_2": 0.0,
|
|
"rewards/final_correctness_reward_func": 0.25,
|
|
"rewards/question_recreation_reward_func": 0.7966707283630967,
|
|
"rewards/soft_format_reward_func": 0.0,
|
|
"rewards/strict_format_reward_func": 0.21875,
|
|
"rewards/xmlcount_reward_func": 0.6761250011622906,
|
|
"step": 2
|
|
},
|
|
{
|
|
"completion_length": 201.75,
|
|
"epoch": 0.2,
|
|
"grad_norm": 23.945329666137695,
|
|
"kl": 0.19989765621721745,
|
|
"learning_rate": 4.698684378016222e-07,
|
|
"loss": 0.0002,
|
|
"reward": 7.065919041633606,
|
|
"reward_std": 0.5927391643635929,
|
|
"rewards/concensus_correctness_reward_func": 2.1215000078082085,
|
|
"rewards/consensus_reward_func": 1.875,
|
|
"rewards/cumulative_reward_2": 0.0,
|
|
"rewards/final_correctness_reward_func": 0.5,
|
|
"rewards/question_recreation_reward_func": 0.990481548011303,
|
|
"rewards/soft_format_reward_func": 0.0,
|
|
"rewards/strict_format_reward_func": 0.40625,
|
|
"rewards/xmlcount_reward_func": 1.1726875007152557,
|
|
"step": 4
|
|
},
|
|
{
|
|
"completion_length": 178.375,
|
|
"epoch": 0.3,
|
|
"grad_norm": 2180.853271484375,
|
|
"kl": 5311555.980729777,
|
|
"learning_rate": 4.193203929064353e-07,
|
|
"loss": 5311.5566,
|
|
"reward": 4.848113030195236,
|
|
"reward_std": 2.48440220952034,
|
|
"rewards/concensus_correctness_reward_func": 1.3193749785423279,
|
|
"rewards/consensus_reward_func": 1.375,
|
|
"rewards/cumulative_reward_2": 0.0,
|
|
"rewards/final_correctness_reward_func": 0.0,
|
|
"rewards/question_recreation_reward_func": 0.7886755615472794,
|
|
"rewards/soft_format_reward_func": 0.0,
|
|
"rewards/strict_format_reward_func": 0.3125,
|
|
"rewards/xmlcount_reward_func": 1.0525624975562096,
|
|
"step": 6
|
|
},
|
|
{
|
|
"completion_length": 201.125,
|
|
"epoch": 0.4,
|
|
"grad_norm": 52907.8203125,
|
|
"kl": 1112.07909232378,
|
|
"learning_rate": 3.5042385616324236e-07,
|
|
"loss": 1.1121,
|
|
"reward": 5.95790758728981,
|
|
"reward_std": 1.2487200200557709,
|
|
"rewards/concensus_correctness_reward_func": 1.6839999929070473,
|
|
"rewards/consensus_reward_func": 1.5,
|
|
"rewards/cumulative_reward_2": 0.0,
|
|
"rewards/final_correctness_reward_func": 0.25,
|
|
"rewards/question_recreation_reward_func": 0.9379700720310211,
|
|
"rewards/soft_format_reward_func": 0.0,
|
|
"rewards/strict_format_reward_func": 0.40625,
|
|
"rewards/xmlcount_reward_func": 1.1796875,
|
|
"step": 8
|
|
},
|
|
{
|
|
"completion_length": 188.875,
|
|
"epoch": 0.5,
|
|
"grad_norm": 2865887744.0,
|
|
"kl": 40746333.48964184,
|
|
"learning_rate": 2.706448363680831e-07,
|
|
"loss": 40746.332,
|
|
"reward": 7.091725826263428,
|
|
"reward_std": 0.6974060980137438,
|
|
"rewards/concensus_correctness_reward_func": 2.4067499935626984,
|
|
"rewards/consensus_reward_func": 2.0,
|
|
"rewards/cumulative_reward_2": 0.0,
|
|
"rewards/final_correctness_reward_func": 0.25,
|
|
"rewards/question_recreation_reward_func": 0.9519133418798447,
|
|
"rewards/soft_format_reward_func": 0.0,
|
|
"rewards/strict_format_reward_func": 0.34375,
|
|
"rewards/xmlcount_reward_func": 1.1393124982714653,
|
|
"step": 10
|
|
},
|
|
{
|
|
"completion_length": 179.6875,
|
|
"epoch": 0.6,
|
|
"grad_norm": 2026.5494384765625,
|
|
"kl": 44.08751246146858,
|
|
"learning_rate": 1.886286282148002e-07,
|
|
"loss": 0.0441,
|
|
"reward": 6.057031333446503,
|
|
"reward_std": 1.4806374236941338,
|
|
"rewards/concensus_correctness_reward_func": 1.6842499673366547,
|
|
"rewards/consensus_reward_func": 1.625,
|
|
"rewards/cumulative_reward_2": 0.0,
|
|
"rewards/final_correctness_reward_func": 0.25,
|
|
"rewards/question_recreation_reward_func": 0.96484375,
|
|
"rewards/soft_format_reward_func": 0.0,
|
|
"rewards/strict_format_reward_func": 0.375,
|
|
"rewards/xmlcount_reward_func": 1.1579374969005585,
|
|
"step": 12
|
|
},
|
|
{
|
|
"completion_length": 186.75,
|
|
"epoch": 0.7,
|
|
"grad_norm": 3908203.25,
|
|
"kl": 98680.71119815856,
|
|
"learning_rate": 1.1326296046939333e-07,
|
|
"loss": 98.6807,
|
|
"reward": 6.111200124025345,
|
|
"reward_std": 0.8898124806582928,
|
|
"rewards/concensus_correctness_reward_func": 1.8071250086650252,
|
|
"rewards/consensus_reward_func": 1.75,
|
|
"rewards/cumulative_reward_2": 0.0,
|
|
"rewards/final_correctness_reward_func": 0.25,
|
|
"rewards/question_recreation_reward_func": 0.8432626910507679,
|
|
"rewards/soft_format_reward_func": 0.0,
|
|
"rewards/strict_format_reward_func": 0.3125,
|
|
"rewards/xmlcount_reward_func": 1.1483124941587448,
|
|
"step": 14
|
|
},
|
|
{
|
|
"completion_length": 174.1875,
|
|
"epoch": 0.8,
|
|
"grad_norm": 751.8195190429688,
|
|
"kl": 35.69426943734288,
|
|
"learning_rate": 5.271487265090163e-08,
|
|
"loss": 0.0357,
|
|
"reward": 6.600282669067383,
|
|
"reward_std": 0.7876708060503006,
|
|
"rewards/concensus_correctness_reward_func": 1.9124999642372131,
|
|
"rewards/consensus_reward_func": 1.875,
|
|
"rewards/cumulative_reward_2": 0.0,
|
|
"rewards/final_correctness_reward_func": 0.25,
|
|
"rewards/question_recreation_reward_func": 0.8909076675772667,
|
|
"rewards/soft_format_reward_func": 0.0,
|
|
"rewards/strict_format_reward_func": 0.4375,
|
|
"rewards/xmlcount_reward_func": 1.234375,
|
|
"step": 16
|
|
},
|
|
{
|
|
"completion_length": 230.25,
|
|
"epoch": 0.9,
|
|
"grad_norm": 8160253.5,
|
|
"kl": 155507.37884235661,
|
|
"learning_rate": 1.3545689574841341e-08,
|
|
"loss": 155.5074,
|
|
"reward": 6.18654590845108,
|
|
"reward_std": 0.4526741732552182,
|
|
"rewards/concensus_correctness_reward_func": 1.835249975323677,
|
|
"rewards/consensus_reward_func": 1.75,
|
|
"rewards/cumulative_reward_2": 0.0,
|
|
"rewards/final_correctness_reward_func": 0.125,
|
|
"rewards/question_recreation_reward_func": 0.9319210276007652,
|
|
"rewards/soft_format_reward_func": 0.0,
|
|
"rewards/strict_format_reward_func": 0.375,
|
|
"rewards/xmlcount_reward_func": 1.1693750023841858,
|
|
"step": 18
|
|
},
|
|
{
|
|
"completion_length": 159.875,
|
|
"epoch": 1.0,
|
|
"grad_norm": 15.891400337219238,
|
|
"kl": 0.8860930278897285,
|
|
"learning_rate": 0.0,
|
|
"loss": 0.0009,
|
|
"reward": 5.829563498497009,
|
|
"reward_std": 1.0952186286449432,
|
|
"rewards/concensus_correctness_reward_func": 1.558749981224537,
|
|
"rewards/consensus_reward_func": 1.625,
|
|
"rewards/cumulative_reward_2": 0.0,
|
|
"rewards/final_correctness_reward_func": 0.0,
|
|
"rewards/question_recreation_reward_func": 0.9997510015964508,
|
|
"rewards/soft_format_reward_func": 0.0,
|
|
"rewards/strict_format_reward_func": 0.4375,
|
|
"rewards/xmlcount_reward_func": 1.2085624933242798,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"step": 20,
|
|
"total_flos": 0.0,
|
|
"train_loss": 4631.326969934987,
|
|
"train_runtime": 88.7341,
|
|
"train_samples_per_second": 1.803,
|
|
"train_steps_per_second": 0.225
|
|
}
|
|
],
|
|
"logging_steps": 2,
|
|
"max_steps": 20,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 25,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 2,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|