Model: Mahdikppp/Qwen2.5-0.5B-Instruct-Gensyn-Swarm-invisible_ravenous_mongoose Source: Original Platform
444 lines
18 KiB
JSON
444 lines
18 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.975609756097561,
|
|
"eval_steps": 500,
|
|
"global_step": 20,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 4.0,
|
|
"completions/max_terminated_length": 4.0,
|
|
"completions/mean_length": 4.0,
|
|
"completions/mean_terminated_length": 4.0,
|
|
"completions/min_length": 4.0,
|
|
"completions/min_terminated_length": 4.0,
|
|
"epoch": 0.0975609756097561,
|
|
"frac_reward_zero_std": 0.75,
|
|
"grad_norm": 0.0,
|
|
"kl": 0.0,
|
|
"learning_rate": 5e-07,
|
|
"loss": -0.0,
|
|
"num_tokens": 2080.0,
|
|
"reward": 2.107478141784668,
|
|
"reward_std": 0.7532989382743835,
|
|
"rewards/concensus_correctness_reward_func/mean": 0.8445000648498535,
|
|
"rewards/concensus_correctness_reward_func/std": 0.5630000233650208,
|
|
"rewards/consensus_reward_func/mean": 1.25,
|
|
"rewards/consensus_reward_func/std": 1.0773502588272095,
|
|
"rewards/cumulative_reward_2/mean": 0.0,
|
|
"rewards/cumulative_reward_2/std": 0.0,
|
|
"rewards/final_correctness_reward_func/mean": 0.0,
|
|
"rewards/final_correctness_reward_func/std": 0.0,
|
|
"rewards/question_recreation_reward_func/mean": 0.012978244572877884,
|
|
"rewards/question_recreation_reward_func/std": 0.004188001621514559,
|
|
"rewards/soft_format_reward_func/mean": 0.0,
|
|
"rewards/soft_format_reward_func/std": 0.0,
|
|
"rewards/strict_format_reward_func/mean": 0.0,
|
|
"rewards/strict_format_reward_func/std": 0.0,
|
|
"rewards/xmlcount_reward_func/mean": 0.0,
|
|
"rewards/xmlcount_reward_func/std": 0.0,
|
|
"step": 2
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 4.0,
|
|
"completions/max_terminated_length": 4.0,
|
|
"completions/mean_length": 3.5,
|
|
"completions/mean_terminated_length": 3.5,
|
|
"completions/min_length": 3.0,
|
|
"completions/min_terminated_length": 3.0,
|
|
"epoch": 0.1951219512195122,
|
|
"frac_reward_zero_std": 0.75,
|
|
"grad_norm": 3.73433053368899e-08,
|
|
"kl": 0.000426415354013443,
|
|
"learning_rate": 4.864543104251586e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 4156.0,
|
|
"reward": 1.6496462225914001,
|
|
"reward_std": 0.5526039600372314,
|
|
"rewards/concensus_correctness_reward_func/mean": 1.140749990940094,
|
|
"rewards/concensus_correctness_reward_func/std": 1.647525429725647,
|
|
"rewards/consensus_reward_func/mean": 0.5,
|
|
"rewards/consensus_reward_func/std": 0.5773502588272095,
|
|
"rewards/cumulative_reward_2/mean": 0.0,
|
|
"rewards/cumulative_reward_2/std": 0.0,
|
|
"rewards/final_correctness_reward_func/mean": 0.0,
|
|
"rewards/final_correctness_reward_func/std": 0.0,
|
|
"rewards/question_recreation_reward_func/mean": 0.008896212559193373,
|
|
"rewards/question_recreation_reward_func/std": 0.0068460507318377495,
|
|
"rewards/soft_format_reward_func/mean": 0.0,
|
|
"rewards/soft_format_reward_func/std": 0.0,
|
|
"rewards/strict_format_reward_func/mean": 0.0,
|
|
"rewards/strict_format_reward_func/std": 0.0,
|
|
"rewards/xmlcount_reward_func/mean": 0.0,
|
|
"rewards/xmlcount_reward_func/std": 0.0,
|
|
"step": 4
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 4.0,
|
|
"completions/max_terminated_length": 4.0,
|
|
"completions/mean_length": 3.5,
|
|
"completions/mean_terminated_length": 3.5,
|
|
"completions/min_length": 3.0,
|
|
"completions/min_terminated_length": 3.0,
|
|
"epoch": 0.2926829268292683,
|
|
"frac_reward_zero_std": 1.0,
|
|
"grad_norm": 6.071777391980504e-08,
|
|
"kl": 2.3096799850463867e-06,
|
|
"learning_rate": 4.472851273490984e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 6232.0,
|
|
"reward": 1.2626911401748657,
|
|
"reward_std": 0.0,
|
|
"rewards/concensus_correctness_reward_func/mean": 0.7299999892711639,
|
|
"rewards/concensus_correctness_reward_func/std": 0.8429313898086548,
|
|
"rewards/consensus_reward_func/mean": 0.5,
|
|
"rewards/consensus_reward_func/std": 0.5773502588272095,
|
|
"rewards/cumulative_reward_2/mean": 0.0,
|
|
"rewards/cumulative_reward_2/std": 0.0,
|
|
"rewards/final_correctness_reward_func/mean": 0.0,
|
|
"rewards/final_correctness_reward_func/std": 0.0,
|
|
"rewards/question_recreation_reward_func/mean": 0.03269112668931484,
|
|
"rewards/question_recreation_reward_func/std": 0.0033300668001174927,
|
|
"rewards/soft_format_reward_func/mean": 0.0,
|
|
"rewards/soft_format_reward_func/std": 0.0,
|
|
"rewards/strict_format_reward_func/mean": 0.0,
|
|
"rewards/strict_format_reward_func/std": 0.0,
|
|
"rewards/xmlcount_reward_func/mean": 0.0,
|
|
"rewards/xmlcount_reward_func/std": 0.0,
|
|
"step": 6
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 42.0,
|
|
"completions/max_terminated_length": 42.0,
|
|
"completions/mean_length": 20.5,
|
|
"completions/mean_terminated_length": 20.5,
|
|
"completions/min_length": 4.0,
|
|
"completions/min_terminated_length": 4.0,
|
|
"epoch": 0.3902439024390244,
|
|
"frac_reward_zero_std": 0.75,
|
|
"grad_norm": 0.0034936980810016394,
|
|
"kl": 0.09646054328186437,
|
|
"learning_rate": 3.867370395306068e-07,
|
|
"loss": 0.0001,
|
|
"num_tokens": 8444.0,
|
|
"reward": 4.392500638961792,
|
|
"reward_std": 0.011391956359148026,
|
|
"rewards/concensus_correctness_reward_func/mean": 2.6324999928474426,
|
|
"rewards/concensus_correctness_reward_func/std": 0.7765360977500677,
|
|
"rewards/consensus_reward_func/mean": 1.5,
|
|
"rewards/consensus_reward_func/std": 0.5773502588272095,
|
|
"rewards/cumulative_reward_2/mean": 0.0,
|
|
"rewards/cumulative_reward_2/std": 0.0,
|
|
"rewards/final_correctness_reward_func/mean": 0.0,
|
|
"rewards/final_correctness_reward_func/std": 0.0,
|
|
"rewards/question_recreation_reward_func/mean": 0.011250502662733197,
|
|
"rewards/question_recreation_reward_func/std": 0.00992788770236075,
|
|
"rewards/soft_format_reward_func/mean": 0.0,
|
|
"rewards/soft_format_reward_func/std": 0.0,
|
|
"rewards/strict_format_reward_func/mean": 0.0,
|
|
"rewards/strict_format_reward_func/std": 0.0,
|
|
"rewards/xmlcount_reward_func/mean": 0.2487500011920929,
|
|
"rewards/xmlcount_reward_func/std": 0.28787195682525635,
|
|
"step": 8
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 4.0,
|
|
"completions/max_terminated_length": 4.0,
|
|
"completions/mean_length": 4.0,
|
|
"completions/mean_terminated_length": 4.0,
|
|
"completions/min_length": 4.0,
|
|
"completions/min_terminated_length": 4.0,
|
|
"epoch": 0.4878048780487805,
|
|
"frac_reward_zero_std": 1.0,
|
|
"grad_norm": 3.545432392115799e-08,
|
|
"kl": 0.010027587413787842,
|
|
"learning_rate": 3.1137137178519977e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 10524.0,
|
|
"reward": 2.497630000114441,
|
|
"reward_std": 0.0,
|
|
"rewards/concensus_correctness_reward_func/mean": 0.9599999785423279,
|
|
"rewards/concensus_correctness_reward_func/std": 0.0,
|
|
"rewards/consensus_reward_func/mean": 1.5,
|
|
"rewards/consensus_reward_func/std": 0.5773502588272095,
|
|
"rewards/cumulative_reward_2/mean": 0.0,
|
|
"rewards/cumulative_reward_2/std": 0.0,
|
|
"rewards/final_correctness_reward_func/mean": 0.0,
|
|
"rewards/final_correctness_reward_func/std": 0.0,
|
|
"rewards/question_recreation_reward_func/mean": 0.03762998804450035,
|
|
"rewards/question_recreation_reward_func/std": 0.0033961788285523653,
|
|
"rewards/soft_format_reward_func/mean": 0.0,
|
|
"rewards/soft_format_reward_func/std": 0.0,
|
|
"rewards/strict_format_reward_func/mean": 0.0,
|
|
"rewards/strict_format_reward_func/std": 0.0,
|
|
"rewards/xmlcount_reward_func/mean": 0.0,
|
|
"rewards/xmlcount_reward_func/std": 0.0,
|
|
"step": 10
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 4.0,
|
|
"completions/max_terminated_length": 4.0,
|
|
"completions/mean_length": 4.0,
|
|
"completions/mean_terminated_length": 4.0,
|
|
"completions/min_length": 4.0,
|
|
"completions/min_terminated_length": 4.0,
|
|
"epoch": 0.5853658536585366,
|
|
"frac_reward_zero_std": 1.0,
|
|
"grad_norm": 5.641782152565611e-09,
|
|
"kl": 0.04576372355222702,
|
|
"learning_rate": 2.2935516363191693e-07,
|
|
"loss": 0.0,
|
|
"num_tokens": 12604.0,
|
|
"reward": 4.48307740688324,
|
|
"reward_std": 0.0,
|
|
"rewards/concensus_correctness_reward_func/mean": 2.940999984741211,
|
|
"rewards/concensus_correctness_reward_func/std": 1.1304517984390259,
|
|
"rewards/consensus_reward_func/mean": 1.5,
|
|
"rewards/consensus_reward_func/std": 0.5773502588272095,
|
|
"rewards/cumulative_reward_2/mean": 0.0,
|
|
"rewards/cumulative_reward_2/std": 0.0,
|
|
"rewards/final_correctness_reward_func/mean": 0.0,
|
|
"rewards/final_correctness_reward_func/std": 0.0,
|
|
"rewards/question_recreation_reward_func/mean": 0.04207756486721337,
|
|
"rewards/question_recreation_reward_func/std": 0.0,
|
|
"rewards/soft_format_reward_func/mean": 0.0,
|
|
"rewards/soft_format_reward_func/std": 0.0,
|
|
"rewards/strict_format_reward_func/mean": 0.0,
|
|
"rewards/strict_format_reward_func/std": 0.0,
|
|
"rewards/xmlcount_reward_func/mean": 0.0,
|
|
"rewards/xmlcount_reward_func/std": 0.0,
|
|
"step": 12
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 4.5,
|
|
"completions/max_terminated_length": 4.5,
|
|
"completions/mean_length": 4.125,
|
|
"completions/mean_terminated_length": 4.125,
|
|
"completions/min_length": 4.0,
|
|
"completions/min_terminated_length": 4.0,
|
|
"epoch": 0.6829268292682927,
|
|
"frac_reward_zero_std": 0.75,
|
|
"grad_norm": 223.39295959472656,
|
|
"kl": 0.20429373532533646,
|
|
"learning_rate": 1.4957614383675767e-07,
|
|
"loss": 0.0002,
|
|
"num_tokens": 14685.0,
|
|
"reward": 1.2401338815689087,
|
|
"reward_std": 0.34923410415649414,
|
|
"rewards/concensus_correctness_reward_func/mean": 0.4790000021457672,
|
|
"rewards/concensus_correctness_reward_func/std": 0.5531015992164612,
|
|
"rewards/consensus_reward_func/mean": 0.75,
|
|
"rewards/consensus_reward_func/std": 1.0773502588272095,
|
|
"rewards/cumulative_reward_2/mean": 0.0,
|
|
"rewards/cumulative_reward_2/std": 0.0,
|
|
"rewards/final_correctness_reward_func/mean": 0.0,
|
|
"rewards/final_correctness_reward_func/std": 0.0,
|
|
"rewards/question_recreation_reward_func/mean": 0.011133914813399315,
|
|
"rewards/question_recreation_reward_func/std": 0.005399489309638739,
|
|
"rewards/soft_format_reward_func/mean": 0.0,
|
|
"rewards/soft_format_reward_func/std": 0.0,
|
|
"rewards/strict_format_reward_func/mean": 0.0,
|
|
"rewards/strict_format_reward_func/std": 0.0,
|
|
"rewards/xmlcount_reward_func/mean": 0.0,
|
|
"rewards/xmlcount_reward_func/std": 0.0,
|
|
"step": 14
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 4.0,
|
|
"completions/max_terminated_length": 4.0,
|
|
"completions/mean_length": 4.0,
|
|
"completions/mean_terminated_length": 4.0,
|
|
"completions/min_length": 4.0,
|
|
"completions/min_terminated_length": 4.0,
|
|
"epoch": 0.7804878048780488,
|
|
"frac_reward_zero_std": 1.0,
|
|
"grad_norm": 0.00020408314594533294,
|
|
"kl": 0.20127877593040466,
|
|
"learning_rate": 8.067960709356478e-08,
|
|
"loss": 0.0002,
|
|
"num_tokens": 16765.0,
|
|
"reward": 3.500740647315979,
|
|
"reward_std": 0.0,
|
|
"rewards/concensus_correctness_reward_func/mean": 1.9600000381469727,
|
|
"rewards/concensus_correctness_reward_func/std": 0.0,
|
|
"rewards/consensus_reward_func/mean": 1.5,
|
|
"rewards/consensus_reward_func/std": 0.5773502588272095,
|
|
"rewards/cumulative_reward_2/mean": 0.0,
|
|
"rewards/cumulative_reward_2/std": 0.0,
|
|
"rewards/final_correctness_reward_func/mean": 0.0,
|
|
"rewards/final_correctness_reward_func/std": 0.0,
|
|
"rewards/question_recreation_reward_func/mean": 0.04074074048548937,
|
|
"rewards/question_recreation_reward_func/std": 0.0,
|
|
"rewards/soft_format_reward_func/mean": 0.0,
|
|
"rewards/soft_format_reward_func/std": 0.0,
|
|
"rewards/strict_format_reward_func/mean": 0.0,
|
|
"rewards/strict_format_reward_func/std": 0.0,
|
|
"rewards/xmlcount_reward_func/mean": 0.0,
|
|
"rewards/xmlcount_reward_func/std": 0.0,
|
|
"step": 16
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 4.0,
|
|
"completions/max_terminated_length": 4.0,
|
|
"completions/mean_length": 4.0,
|
|
"completions/mean_terminated_length": 4.0,
|
|
"completions/min_length": 4.0,
|
|
"completions/min_terminated_length": 4.0,
|
|
"epoch": 0.8780487804878049,
|
|
"frac_reward_zero_std": 1.0,
|
|
"grad_norm": 7.515780886535595e-09,
|
|
"kl": 0.0,
|
|
"learning_rate": 3.013156219837776e-08,
|
|
"loss": 0.0,
|
|
"num_tokens": 18845.0,
|
|
"reward": 3.4462019205093384,
|
|
"reward_std": 0.0,
|
|
"rewards/concensus_correctness_reward_func/mean": 1.9130000472068787,
|
|
"rewards/concensus_correctness_reward_func/std": 0.0519615113735199,
|
|
"rewards/consensus_reward_func/mean": 1.5,
|
|
"rewards/consensus_reward_func/std": 0.5773502588272095,
|
|
"rewards/cumulative_reward_2/mean": 0.0,
|
|
"rewards/cumulative_reward_2/std": 0.0,
|
|
"rewards/final_correctness_reward_func/mean": 0.0,
|
|
"rewards/final_correctness_reward_func/std": 0.0,
|
|
"rewards/question_recreation_reward_func/mean": 0.03320199949666858,
|
|
"rewards/question_recreation_reward_func/std": 0.0,
|
|
"rewards/soft_format_reward_func/mean": 0.0,
|
|
"rewards/soft_format_reward_func/std": 0.0,
|
|
"rewards/strict_format_reward_func/mean": 0.0,
|
|
"rewards/strict_format_reward_func/std": 0.0,
|
|
"rewards/xmlcount_reward_func/mean": 0.0,
|
|
"rewards/xmlcount_reward_func/std": 0.0,
|
|
"step": 18
|
|
},
|
|
{
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 4.0,
|
|
"completions/max_terminated_length": 4.0,
|
|
"completions/mean_length": 4.0,
|
|
"completions/mean_terminated_length": 4.0,
|
|
"completions/min_length": 4.0,
|
|
"completions/min_terminated_length": 4.0,
|
|
"epoch": 0.975609756097561,
|
|
"frac_reward_zero_std": 1.0,
|
|
"grad_norm": 2.6966833210906316e-09,
|
|
"kl": 0.32504162192344666,
|
|
"learning_rate": 3.4096741493194193e-09,
|
|
"loss": 0.0003,
|
|
"num_tokens": 20925.0,
|
|
"reward": 4.6496992111206055,
|
|
"reward_std": 0.0,
|
|
"rewards/concensus_correctness_reward_func/mean": 3.6059999465942383,
|
|
"rewards/concensus_correctness_reward_func/std": 0.0,
|
|
"rewards/consensus_reward_func/mean": 1.0,
|
|
"rewards/consensus_reward_func/std": 0.0,
|
|
"rewards/cumulative_reward_2/mean": 0.0,
|
|
"rewards/cumulative_reward_2/std": 0.0,
|
|
"rewards/final_correctness_reward_func/mean": 0.0,
|
|
"rewards/final_correctness_reward_func/std": 0.0,
|
|
"rewards/question_recreation_reward_func/mean": 0.0436993483453989,
|
|
"rewards/question_recreation_reward_func/std": 0.0,
|
|
"rewards/soft_format_reward_func/mean": 0.0,
|
|
"rewards/soft_format_reward_func/std": 0.0,
|
|
"rewards/strict_format_reward_func/mean": 0.0,
|
|
"rewards/strict_format_reward_func/std": 0.0,
|
|
"rewards/xmlcount_reward_func/mean": 0.0,
|
|
"rewards/xmlcount_reward_func/std": 0.0,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.975609756097561,
|
|
"step": 20,
|
|
"total_flos": 0.0,
|
|
"train_loss": 8.8329258843034e-05,
|
|
"train_runtime": 703.5963,
|
|
"train_samples_per_second": 0.114,
|
|
"train_steps_per_second": 0.028
|
|
}
|
|
],
|
|
"logging_steps": 2,
|
|
"max_steps": 20,
|
|
"num_input_tokens_seen": 20925,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 25,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|