{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 6.8, "eval_steps": 500, "global_step": 14, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 27.0, "completions/max_terminated_length": 27.0, "completions/mean_length": 23.5, "completions/mean_terminated_length": 23.5, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "epoch": 0.4, "frac_reward_zero_std": 0.0, "grad_norm": 45.845420837402344, "kl": 0.0, "learning_rate": 0.0, "loss": 0.0, "num_tokens": 606.0, "reward": 0.27725791931152344, "reward_std": 0.0050897058099508286, "rewards/concensus_correctness_reward_func/mean": 0.0, "rewards/concensus_correctness_reward_func/std": 0.0, "rewards/consensus_reward_func/mean": 0.0, "rewards/consensus_reward_func/std": 0.0, "rewards/cumulative_reward_2/mean": 0.0, "rewards/cumulative_reward_2/std": 0.0, "rewards/final_correctness_reward_func/mean": 0.0, "rewards/final_correctness_reward_func/std": 0.0, "rewards/question_recreation_reward_func/mean": 0.184007927775383, "rewards/question_recreation_reward_func/std": 0.007569987326860428, "rewards/soft_format_reward_func/mean": 0.0, "rewards/soft_format_reward_func/std": 0.0, "rewards/strict_format_reward_func/mean": 0.0, "rewards/strict_format_reward_func/std": 0.0, "rewards/xmlcount_reward_func/mean": 0.09325000643730164, "rewards/xmlcount_reward_func/std": 0.10767660290002823, "step": 1 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 27.0, "completions/max_terminated_length": 27.0, "completions/mean_length": 24.0, "completions/mean_terminated_length": 24.0, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "epoch": 0.8, "frac_reward_zero_std": 0.5, "grad_norm": 9.568375587463379, "kl": 0.0, "learning_rate": 1e-06, "loss": -0.0, "num_tokens": 1214.0, "reward": 0.05984270200133324, "reward_std": 5.343298471416347e-05, "rewards/concensus_correctness_reward_func/mean": 0.0, "rewards/concensus_correctness_reward_func/std": 0.0, "rewards/consensus_reward_func/mean": 0.0, "rewards/consensus_reward_func/std": 0.0, "rewards/cumulative_reward_2/mean": 0.0, "rewards/cumulative_reward_2/std": 0.0, "rewards/final_correctness_reward_func/mean": 0.0, "rewards/final_correctness_reward_func/std": 0.0, "rewards/question_recreation_reward_func/mean": 0.05984270200133324, "rewards/question_recreation_reward_func/std": 0.0004468102415557951, "rewards/soft_format_reward_func/mean": 0.0, "rewards/soft_format_reward_func/std": 0.0, "rewards/strict_format_reward_func/mean": 0.0, "rewards/strict_format_reward_func/std": 0.0, "rewards/xmlcount_reward_func/mean": 0.0, "rewards/xmlcount_reward_func/std": 0.0, "step": 2 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 23.0, "completions/max_terminated_length": 23.0, "completions/mean_length": 21.5, "completions/mean_terminated_length": 21.5, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "epoch": 1.4, "frac_reward_zero_std": 0.5, "grad_norm": 39.694496154785156, "kl": 0.1073869839310646, "learning_rate": 9.931806517013612e-07, "loss": 0.0043, "num_tokens": 1812.0, "reward": 0.20595212280750275, "reward_std": 0.0009102191543206573, "rewards/concensus_correctness_reward_func/mean": 0.0, "rewards/concensus_correctness_reward_func/std": 0.0, "rewards/consensus_reward_func/mean": 0.0, "rewards/consensus_reward_func/std": 0.0, "rewards/cumulative_reward_2/mean": 0.0, "rewards/cumulative_reward_2/std": 0.0, "rewards/final_correctness_reward_func/mean": 0.0, "rewards/final_correctness_reward_func/std": 0.0, "rewards/question_recreation_reward_func/mean": 0.020702123641967773, "rewards/question_recreation_reward_func/std": 0.003408734453842044, "rewards/soft_format_reward_func/mean": 0.0, "rewards/soft_format_reward_func/std": 0.0, "rewards/strict_format_reward_func/mean": 0.0, "rewards/strict_format_reward_func/std": 0.0, "rewards/xmlcount_reward_func/mean": 0.18525001406669617, "rewards/xmlcount_reward_func/std": 0.002061557024717331, "step": 3 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.0, "completions/max_terminated_length": 221.0, "completions/mean_length": 82.25, "completions/mean_terminated_length": 82.25, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "epoch": 1.8, "frac_reward_zero_std": 0.5, "grad_norm": 16.176429748535156, "kl": 0.020354424137622118, "learning_rate": 9.729086208503173e-07, "loss": 0.0798, "num_tokens": 2653.0, "reward": 0.04737311601638794, "reward_std": 0.010779645293951035, "rewards/concensus_correctness_reward_func/mean": 0.0, "rewards/concensus_correctness_reward_func/std": 0.0, "rewards/consensus_reward_func/mean": 0.0, "rewards/consensus_reward_func/std": 0.0, "rewards/cumulative_reward_2/mean": 0.0, "rewards/cumulative_reward_2/std": 0.0, "rewards/final_correctness_reward_func/mean": 0.0, "rewards/final_correctness_reward_func/std": 0.0, "rewards/question_recreation_reward_func/mean": 0.04737311601638794, "rewards/question_recreation_reward_func/std": 0.019436875358223915, "rewards/soft_format_reward_func/mean": 0.0, "rewards/soft_format_reward_func/std": 0.0, "rewards/strict_format_reward_func/mean": 0.0, "rewards/strict_format_reward_func/std": 0.0, "rewards/xmlcount_reward_func/mean": 0.0, "rewards/xmlcount_reward_func/std": 0.0, "step": 4 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 45.0, "completions/max_terminated_length": 45.0, "completions/mean_length": 28.25, "completions/mean_terminated_length": 28.25, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "epoch": 2.4, "frac_reward_zero_std": 0.0, "grad_norm": 44.07194900512695, "kl": 0.08810030668973923, "learning_rate": 9.397368756032444e-07, "loss": 0.0939, "num_tokens": 3278.0, "reward": 0.050381630659103394, "reward_std": 0.006566238589584827, "rewards/concensus_correctness_reward_func/mean": 0.0, "rewards/concensus_correctness_reward_func/std": 0.0, "rewards/consensus_reward_func/mean": 0.0, "rewards/consensus_reward_func/std": 0.0, "rewards/cumulative_reward_2/mean": 0.0, "rewards/cumulative_reward_2/std": 0.0, "rewards/final_correctness_reward_func/mean": 0.0, "rewards/final_correctness_reward_func/std": 0.0, "rewards/question_recreation_reward_func/mean": 0.0503816232085228, "rewards/question_recreation_reward_func/std": 0.007008333690464497, "rewards/soft_format_reward_func/mean": 0.0, "rewards/soft_format_reward_func/std": 0.0, "rewards/strict_format_reward_func/mean": 0.0, "rewards/strict_format_reward_func/std": 0.0, "rewards/xmlcount_reward_func/mean": 0.0, "rewards/xmlcount_reward_func/std": 0.0, "step": 5 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 27.0, "completions/max_terminated_length": 27.0, "completions/mean_length": 25.0, "completions/mean_terminated_length": 25.0, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "epoch": 2.8, "frac_reward_zero_std": 1.0, "grad_norm": 1.3027467727661133, "kl": 0.08069050312042236, "learning_rate": 8.945702546981968e-07, "loss": 0.0032, "num_tokens": 3890.0, "reward": 0.1409657597541809, "reward_std": 0.0, "rewards/concensus_correctness_reward_func/mean": 0.0, "rewards/concensus_correctness_reward_func/std": 0.0, "rewards/consensus_reward_func/mean": 0.0, "rewards/consensus_reward_func/std": 0.0, "rewards/cumulative_reward_2/mean": 0.0, "rewards/cumulative_reward_2/std": 0.0, "rewards/final_correctness_reward_func/mean": 0.0, "rewards/final_correctness_reward_func/std": 0.0, "rewards/question_recreation_reward_func/mean": 0.04946577176451683, "rewards/question_recreation_reward_func/std": 0.012337522581219673, "rewards/soft_format_reward_func/mean": 0.0, "rewards/soft_format_reward_func/std": 0.0, "rewards/strict_format_reward_func/mean": 0.0, "rewards/strict_format_reward_func/std": 0.0, "rewards/xmlcount_reward_func/mean": 0.09149999916553497, "rewards/xmlcount_reward_func/std": 0.10565510392189026, "step": 6 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.0, "completions/max_terminated_length": 157.0, "completions/mean_length": 67.0, "completions/mean_terminated_length": 67.0, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "epoch": 3.4, "frac_reward_zero_std": 0.5, "grad_norm": 18.420305252075195, "kl": 0.03622829727828503, "learning_rate": 8.386407858128706e-07, "loss": -0.0354, "num_tokens": 4670.0, "reward": 0.044995490461587906, "reward_std": 0.0019922610372304916, "rewards/concensus_correctness_reward_func/mean": 0.0, "rewards/concensus_correctness_reward_func/std": 0.0, "rewards/consensus_reward_func/mean": 0.0, "rewards/consensus_reward_func/std": 0.0, "rewards/cumulative_reward_2/mean": 0.0, "rewards/cumulative_reward_2/std": 0.0, "rewards/final_correctness_reward_func/mean": 0.0, "rewards/final_correctness_reward_func/std": 0.0, "rewards/question_recreation_reward_func/mean": 0.044995490461587906, "rewards/question_recreation_reward_func/std": 0.014919609762728214, "rewards/soft_format_reward_func/mean": 0.0, "rewards/soft_format_reward_func/std": 0.0, "rewards/strict_format_reward_func/mean": 0.0, "rewards/strict_format_reward_func/std": 0.0, "rewards/xmlcount_reward_func/mean": 0.0, "rewards/xmlcount_reward_func/std": 0.0, "step": 7 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.0, "completions/max_terminated_length": 146.0, "completions/mean_length": 54.75, "completions/mean_terminated_length": 54.75, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "epoch": 3.8, "frac_reward_zero_std": 0.5, "grad_norm": 11.040939331054688, "kl": 0.025315589271485806, "learning_rate": 7.734740790612136e-07, "loss": 0.243, "num_tokens": 5401.0, "reward": 0.27007412910461426, "reward_std": 0.010380705818533897, "rewards/concensus_correctness_reward_func/mean": 0.0, "rewards/concensus_correctness_reward_func/std": 0.0, "rewards/consensus_reward_func/mean": 0.0, "rewards/consensus_reward_func/std": 0.0, "rewards/cumulative_reward_2/mean": 0.0, "rewards/cumulative_reward_2/std": 0.0, "rewards/final_correctness_reward_func/mean": 0.0, "rewards/final_correctness_reward_func/std": 0.0, "rewards/question_recreation_reward_func/mean": 0.1785741150379181, "rewards/question_recreation_reward_func/std": 0.012178990058600903, "rewards/soft_format_reward_func/mean": 0.0, "rewards/soft_format_reward_func/std": 0.0, "rewards/strict_format_reward_func/mean": 0.0, "rewards/strict_format_reward_func/std": 0.0, "rewards/xmlcount_reward_func/mean": 0.09149999916553497, "rewards/xmlcount_reward_func/std": 0.10565510392189026, "step": 8 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 20.0, "completions/max_terminated_length": 20.0, "completions/mean_length": 17.0, "completions/mean_terminated_length": 17.0, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "epoch": 4.4, "frac_reward_zero_std": 1.0, "grad_norm": 0.005074405111372471, "kl": 0.15366993844509125, "learning_rate": 7.008477123264847e-07, "loss": 0.0061, "num_tokens": 5981.0, "reward": 0.05733615159988403, "reward_std": 0.0, "rewards/concensus_correctness_reward_func/mean": 0.0, "rewards/concensus_correctness_reward_func/std": 0.0, "rewards/consensus_reward_func/mean": 0.0, "rewards/consensus_reward_func/std": 0.0, "rewards/cumulative_reward_2/mean": 0.0, "rewards/cumulative_reward_2/std": 0.0, "rewards/final_correctness_reward_func/mean": 0.0, "rewards/final_correctness_reward_func/std": 0.0, "rewards/question_recreation_reward_func/mean": 0.05733615159988403, "rewards/question_recreation_reward_func/std": 0.0004914185265079141, "rewards/soft_format_reward_func/mean": 0.0, "rewards/soft_format_reward_func/std": 0.0, "rewards/strict_format_reward_func/mean": 0.0, "rewards/strict_format_reward_func/std": 0.0, "rewards/xmlcount_reward_func/mean": 0.0, "rewards/xmlcount_reward_func/std": 0.0, "step": 9 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 389.0, "completions/max_terminated_length": 389.0, "completions/mean_length": 259.5, "completions/mean_terminated_length": 259.5, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 4.8, "frac_reward_zero_std": 0.0, "grad_norm": 12.459712028503418, "kl": 0.029772183392196894, "learning_rate": 6.227427435703995e-07, "loss": 0.2349, "num_tokens": 7531.0, "reward": 0.11735042929649353, "reward_std": 0.018421867862343788, "rewards/concensus_correctness_reward_func/mean": 0.0, "rewards/concensus_correctness_reward_func/std": 0.0, "rewards/consensus_reward_func/mean": 0.0, "rewards/consensus_reward_func/std": 0.0, "rewards/cumulative_reward_2/mean": 0.0, "rewards/cumulative_reward_2/std": 0.0, "rewards/final_correctness_reward_func/mean": 0.0, "rewards/final_correctness_reward_func/std": 0.0, "rewards/question_recreation_reward_func/mean": 0.11735042929649353, "rewards/question_recreation_reward_func/std": 0.01842377707362175, "rewards/soft_format_reward_func/mean": 0.0, "rewards/soft_format_reward_func/std": 0.0, "rewards/strict_format_reward_func/mean": 0.0, "rewards/strict_format_reward_func/std": 0.0, "rewards/xmlcount_reward_func/mean": 0.0, "rewards/xmlcount_reward_func/std": 0.0, "step": 10 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 23.0, "completions/max_terminated_length": 23.0, "completions/mean_length": 18.5, "completions/mean_terminated_length": 18.5, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "epoch": 5.4, "frac_reward_zero_std": 1.0, "grad_norm": 0.039047498255968094, "kl": 0.11775290966033936, "learning_rate": 5.412896727361662e-07, "loss": 0.0047, "num_tokens": 8117.0, "reward": 0.40121549367904663, "reward_std": 0.0, "rewards/concensus_correctness_reward_func/mean": 0.0, "rewards/concensus_correctness_reward_func/std": 0.0, "rewards/consensus_reward_func/mean": 0.0, "rewards/consensus_reward_func/std": 0.0, "rewards/cumulative_reward_2/mean": 0.0, "rewards/cumulative_reward_2/std": 0.0, "rewards/final_correctness_reward_func/mean": 0.0, "rewards/final_correctness_reward_func/std": 0.0, "rewards/question_recreation_reward_func/mean": 0.30971550941467285, "rewards/question_recreation_reward_func/std": 0.044006314128637314, "rewards/soft_format_reward_func/mean": 0.0, "rewards/soft_format_reward_func/std": 0.0, "rewards/strict_format_reward_func/mean": 0.0, "rewards/strict_format_reward_func/std": 0.0, "rewards/xmlcount_reward_func/mean": 0.09149999916553497, "rewards/xmlcount_reward_func/std": 0.10565510392189026, "step": 11 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.0, "completions/max_terminated_length": 143.0, "completions/mean_length": 61.75, "completions/mean_terminated_length": 61.75, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "epoch": 5.8, "frac_reward_zero_std": 0.5, "grad_norm": 24.296092987060547, "kl": 0.0357817467302084, "learning_rate": 4.5871032726383385e-07, "loss": 0.0419, "num_tokens": 8876.0, "reward": 0.048355475068092346, "reward_std": 0.007202191278338432, "rewards/concensus_correctness_reward_func/mean": 0.0, "rewards/concensus_correctness_reward_func/std": 0.0, "rewards/consensus_reward_func/mean": 0.0, "rewards/consensus_reward_func/std": 0.0, "rewards/cumulative_reward_2/mean": 0.0, "rewards/cumulative_reward_2/std": 0.0, "rewards/final_correctness_reward_func/mean": 0.0, "rewards/final_correctness_reward_func/std": 0.0, "rewards/question_recreation_reward_func/mean": 0.048355475068092346, "rewards/question_recreation_reward_func/std": 0.020798446610569954, "rewards/soft_format_reward_func/mean": 0.0, "rewards/soft_format_reward_func/std": 0.0, "rewards/strict_format_reward_func/mean": 0.0, "rewards/strict_format_reward_func/std": 0.0, "rewards/xmlcount_reward_func/mean": 0.0, "rewards/xmlcount_reward_func/std": 0.0, "step": 12 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.0, "completions/max_terminated_length": 149.0, "completions/mean_length": 64.25, "completions/mean_terminated_length": 64.25, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "epoch": 6.4, "frac_reward_zero_std": 0.5, "grad_norm": 20.938844680786133, "kl": 0.061262642964720726, "learning_rate": 3.772572564296004e-07, "loss": 0.0246, "num_tokens": 9645.0, "reward": 0.19921739399433136, "reward_std": 0.01111482735723257, "rewards/concensus_correctness_reward_func/mean": 0.0, "rewards/concensus_correctness_reward_func/std": 0.0, "rewards/consensus_reward_func/mean": 0.0, "rewards/consensus_reward_func/std": 0.0, "rewards/cumulative_reward_2/mean": 0.0, "rewards/cumulative_reward_2/std": 0.0, "rewards/final_correctness_reward_func/mean": 0.0, "rewards/final_correctness_reward_func/std": 0.0, "rewards/question_recreation_reward_func/mean": 0.19921737909317017, "rewards/question_recreation_reward_func/std": 0.08456551283597946, "rewards/soft_format_reward_func/mean": 0.0, "rewards/soft_format_reward_func/std": 0.0, "rewards/strict_format_reward_func/mean": 0.0, "rewards/strict_format_reward_func/std": 0.0, "rewards/xmlcount_reward_func/mean": 0.0, "rewards/xmlcount_reward_func/std": 0.0, "step": 13 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 23.0, "completions/max_terminated_length": 23.0, "completions/mean_length": 21.0, "completions/mean_terminated_length": 21.0, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "epoch": 6.8, "frac_reward_zero_std": 0.5, "grad_norm": 12.05763053894043, "kl": 0.054638177156448364, "learning_rate": 2.9915228767351535e-07, "loss": 0.0173, "num_tokens": 10241.0, "reward": 0.14335308969020844, "reward_std": 0.0008261414477601647, "rewards/concensus_correctness_reward_func/mean": 0.0, "rewards/concensus_correctness_reward_func/std": 0.0, "rewards/consensus_reward_func/mean": 0.0, "rewards/consensus_reward_func/std": 0.0, "rewards/cumulative_reward_2/mean": 0.0, "rewards/cumulative_reward_2/std": 0.0, "rewards/final_correctness_reward_func/mean": 0.0, "rewards/final_correctness_reward_func/std": 0.0, "rewards/question_recreation_reward_func/mean": 0.0513530895113945, "rewards/question_recreation_reward_func/std": 0.007401337847113609, "rewards/soft_format_reward_func/mean": 0.0, "rewards/soft_format_reward_func/std": 0.0, "rewards/strict_format_reward_func/mean": 0.0, "rewards/strict_format_reward_func/std": 0.0, "rewards/xmlcount_reward_func/mean": 0.09200000017881393, "rewards/xmlcount_reward_func/std": 0.10623559355735779, "step": 14 }, { "epoch": 6.8, "step": 14, "total_flos": 0.0, "train_loss": 0.05132548208348453, "train_runtime": 571.6229, "train_samples_per_second": 0.14, "train_steps_per_second": 0.035 } ], "logging_steps": 1, "max_steps": 20, "num_input_tokens_seen": 10241, "num_train_epochs": 7, "save_steps": 25, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }