{ "best_global_step": 21, "best_metric": 0.6508771929824562, "best_model_checkpoint": "/root/ARC-Easy_Llama-3.2-1B/no_sweep/oqrx1b71_lunar-oath-85/checkpoint-21", "epoch": 21.0, "eval_steps": 500, "global_step": 21, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0, "eval_accumulated_loss": 875.1691761016846, "eval_accuracy": 0.3017543859649123, "eval_accuracy_32": 0.9746835443037974, "eval_accuracy_33": 0.0, "eval_accuracy_34": 0.11971830985915492, "eval_accuracy_35": 0.00847457627118644, "eval_accuracy_36": 0.0, "eval_correct_gen_preds": 170.0, "eval_correct_gen_preds_32": 154.0, "eval_correct_gen_preds_33": 0.0, "eval_correct_gen_preds_34": 15.0, "eval_correct_gen_preds_35": 1.0, "eval_correct_gen_preds_36": 0.0, "eval_correct_preds": 172.0, "eval_correct_preds_32": 154.0, "eval_correct_preds_33": 0.0, "eval_correct_preds_34": 17.0, "eval_correct_preds_35": 1.0, "eval_correct_preds_36": 0.0, "eval_gen_accuracy": 0.2982456140350877, "eval_gen_accuracy_32": 0.9746835443037974, "eval_gen_accuracy_33": 0.0, "eval_gen_accuracy_34": 0.1056338028169014, "eval_gen_accuracy_35": 0.00847457627118644, "eval_gen_accuracy_36": 0.0, "eval_loss": 1.5353845357894897, "eval_mdl": 1262.6022303007803, "eval_model_preparation_time": 0.0069, "eval_runtime": 2.0141, "eval_samples_per_second": 283.004, "eval_steps_per_second": 2.979, "eval_total_labels_32": 158.0, "eval_total_labels_33": 152.0, "eval_total_labels_34": 142.0, "eval_total_labels_35": 118.0, "eval_total_labels_36": 0.0, "eval_total_preds": 570.0, "step": 0 }, { "epoch": 1.0, "grad_norm": 117.0, "learning_rate": 0.0, "loss": 1.4592, "step": 1 }, { "epoch": 1.0, "eval_accumulated_loss": 875.1691761016846, "eval_accuracy": 0.3017543859649123, "eval_accuracy_32": 0.9746835443037974, "eval_accuracy_33": 0.0, "eval_accuracy_34": 0.11971830985915492, "eval_accuracy_35": 0.00847457627118644, "eval_accuracy_36": 0.0, "eval_correct_gen_preds": 170.0, "eval_correct_gen_preds_32": 154.0, "eval_correct_gen_preds_33": 0.0, "eval_correct_gen_preds_34": 15.0, "eval_correct_gen_preds_35": 1.0, "eval_correct_gen_preds_36": 0.0, "eval_correct_preds": 172.0, "eval_correct_preds_32": 154.0, "eval_correct_preds_33": 0.0, "eval_correct_preds_34": 17.0, "eval_correct_preds_35": 1.0, "eval_correct_preds_36": 0.0, "eval_gen_accuracy": 0.2982456140350877, "eval_gen_accuracy_32": 0.9746835443037974, "eval_gen_accuracy_33": 0.0, "eval_gen_accuracy_34": 0.1056338028169014, "eval_gen_accuracy_35": 0.00847457627118644, "eval_gen_accuracy_36": 0.0, "eval_loss": 1.5353845357894897, "eval_mdl": 1262.6022303007803, "eval_model_preparation_time": 0.0069, "eval_runtime": 1.9316, "eval_samples_per_second": 295.093, "eval_steps_per_second": 3.106, "eval_total_labels_32": 158.0, "eval_total_labels_33": 152.0, "eval_total_labels_34": 142.0, "eval_total_labels_35": 118.0, "eval_total_labels_36": 0.0, "eval_total_preds": 570.0, "step": 1 }, { "epoch": 2.0, "grad_norm": 116.0, "learning_rate": 2e-05, "loss": 1.4587, "step": 2 }, { "epoch": 2.0, "eval_accumulated_loss": 1628.622148513794, "eval_accuracy": 0.3298245614035088, "eval_accuracy_32": 0.0, "eval_accuracy_33": 0.3026315789473684, "eval_accuracy_34": 0.9929577464788732, "eval_accuracy_35": 0.00847457627118644, "eval_accuracy_36": 0.0, "eval_correct_gen_preds": 188.0, "eval_correct_gen_preds_32": 0.0, "eval_correct_gen_preds_33": 46.0, "eval_correct_gen_preds_34": 141.0, "eval_correct_gen_preds_35": 1.0, "eval_correct_gen_preds_36": 0.0, "eval_correct_preds": 188.0, "eval_correct_preds_32": 0.0, "eval_correct_preds_33": 46.0, "eval_correct_preds_34": 141.0, "eval_correct_preds_35": 1.0, "eval_correct_preds_36": 0.0, "eval_gen_accuracy": 0.3298245614035088, "eval_gen_accuracy_32": 0.0, "eval_gen_accuracy_33": 0.3026315789473684, "eval_gen_accuracy_34": 0.9929577464788732, "eval_gen_accuracy_35": 0.00847457627118644, "eval_gen_accuracy_36": 0.0, "eval_loss": 2.857232093811035, "eval_mdl": 2349.6050971427794, "eval_model_preparation_time": 0.0069, "eval_runtime": 1.9821, "eval_samples_per_second": 287.57, "eval_steps_per_second": 3.027, "eval_total_labels_32": 158.0, "eval_total_labels_33": 152.0, "eval_total_labels_34": 142.0, "eval_total_labels_35": 118.0, "eval_total_labels_36": 0.0, "eval_total_preds": 570.0, "step": 2 }, { "epoch": 3.0, "grad_norm": 125.0, "learning_rate": 1.9994965423831853e-05, "loss": 1.7615, "step": 3 }, { "epoch": 3.0, "eval_accumulated_loss": 863.5332918167114, "eval_accuracy": 0.30350877192982456, "eval_accuracy_32": 0.0, "eval_accuracy_33": 0.993421052631579, "eval_accuracy_34": 0.056338028169014086, "eval_accuracy_35": 0.11864406779661017, "eval_accuracy_36": 0.0, "eval_correct_gen_preds": 173.0, "eval_correct_gen_preds_32": 0.0, "eval_correct_gen_preds_33": 151.0, "eval_correct_gen_preds_34": 8.0, "eval_correct_gen_preds_35": 14.0, "eval_correct_gen_preds_36": 0.0, "eval_correct_preds": 173.0, "eval_correct_preds_32": 0.0, "eval_correct_preds_33": 151.0, "eval_correct_preds_34": 8.0, "eval_correct_preds_35": 14.0, "eval_correct_preds_36": 0.0, "eval_gen_accuracy": 0.30350877192982456, "eval_gen_accuracy_32": 0.0, "eval_gen_accuracy_33": 0.993421052631579, "eval_gen_accuracy_34": 0.056338028169014086, "eval_gen_accuracy_35": 0.11864406779661017, "eval_gen_accuracy_36": 0.0, "eval_loss": 1.5149707794189453, "eval_mdl": 1245.8151977464918, "eval_model_preparation_time": 0.0069, "eval_runtime": 1.8611, "eval_samples_per_second": 306.269, "eval_steps_per_second": 3.224, "eval_total_labels_32": 158.0, "eval_total_labels_33": 152.0, "eval_total_labels_34": 142.0, "eval_total_labels_35": 118.0, "eval_total_labels_36": 0.0, "eval_total_preds": 570.0, "step": 3 }, { "epoch": 4.0, "grad_norm": 85.0, "learning_rate": 1.9979866764718846e-05, "loss": 0.683, "step": 4 }, { "epoch": 4.0, "eval_accumulated_loss": 786.9393863677979, "eval_accuracy": 0.5385964912280702, "eval_accuracy_32": 0.37341772151898733, "eval_accuracy_33": 0.8289473684210527, "eval_accuracy_34": 0.5915492957746479, "eval_accuracy_35": 0.3220338983050847, "eval_accuracy_36": 0.0, "eval_correct_gen_preds": 254.0, "eval_correct_gen_preds_32": 39.0, "eval_correct_gen_preds_33": 107.0, "eval_correct_gen_preds_34": 76.0, "eval_correct_gen_preds_35": 32.0, "eval_correct_gen_preds_36": 0.0, "eval_correct_preds": 307.0, "eval_correct_preds_32": 59.0, "eval_correct_preds_33": 126.0, "eval_correct_preds_34": 84.0, "eval_correct_preds_35": 38.0, "eval_correct_preds_36": 0.0, "eval_gen_accuracy": 0.4456140350877193, "eval_gen_accuracy_32": 0.2468354430379747, "eval_gen_accuracy_33": 0.7039473684210527, "eval_gen_accuracy_34": 0.5352112676056338, "eval_gen_accuracy_35": 0.2711864406779661, "eval_gen_accuracy_36": 0.0, "eval_loss": 1.3805955648422241, "eval_mdl": 1135.313550193026, "eval_model_preparation_time": 0.0069, "eval_runtime": 3.3581, "eval_samples_per_second": 169.741, "eval_steps_per_second": 1.787, "eval_total_labels_32": 158.0, "eval_total_labels_33": 152.0, "eval_total_labels_34": 142.0, "eval_total_labels_35": 118.0, "eval_total_labels_36": 0.0, "eval_total_preds": 570.0, "step": 4 }, { "epoch": 5.0, "grad_norm": 14.125, "learning_rate": 1.9954719225730847e-05, "loss": 0.0591, "step": 5 }, { "epoch": 5.0, "eval_accumulated_loss": 1043.7736988067627, "eval_accuracy": 0.624561403508772, "eval_accuracy_32": 0.620253164556962, "eval_accuracy_33": 0.756578947368421, "eval_accuracy_34": 0.647887323943662, "eval_accuracy_35": 0.4322033898305085, "eval_accuracy_36": 0.0, "eval_correct_gen_preds": 262.0, "eval_correct_gen_preds_32": 53.0, "eval_correct_gen_preds_33": 98.0, "eval_correct_gen_preds_34": 69.0, "eval_correct_gen_preds_35": 42.0, "eval_correct_gen_preds_36": 0.0, "eval_correct_preds": 356.0, "eval_correct_preds_32": 98.0, "eval_correct_preds_33": 115.0, "eval_correct_preds_34": 92.0, "eval_correct_preds_35": 51.0, "eval_correct_preds_36": 0.0, "eval_gen_accuracy": 0.45964912280701753, "eval_gen_accuracy_32": 0.33544303797468356, "eval_gen_accuracy_33": 0.6447368421052632, "eval_gen_accuracy_34": 0.4859154929577465, "eval_gen_accuracy_35": 0.3559322033898305, "eval_gen_accuracy_36": 0.0, "eval_loss": 1.8311821222305298, "eval_mdl": 1505.847139078847, "eval_model_preparation_time": 0.0069, "eval_runtime": 2.0813, "eval_samples_per_second": 273.865, "eval_steps_per_second": 2.883, "eval_total_labels_32": 158.0, "eval_total_labels_33": 152.0, "eval_total_labels_34": 142.0, "eval_total_labels_35": 118.0, "eval_total_labels_36": 0.0, "eval_total_preds": 570.0, "step": 5 }, { "epoch": 6.0, "grad_norm": 0.10400390625, "learning_rate": 1.9919548128307954e-05, "loss": 0.0003, "step": 6 }, { "epoch": 6.0, "eval_accumulated_loss": 1324.264461517334, "eval_accuracy": 0.6192982456140351, "eval_accuracy_32": 0.6139240506329114, "eval_accuracy_33": 0.743421052631579, "eval_accuracy_34": 0.6338028169014085, "eval_accuracy_35": 0.4491525423728814, "eval_accuracy_36": 0.0, "eval_correct_gen_preds": 288.0, "eval_correct_gen_preds_32": 64.0, "eval_correct_gen_preds_33": 103.0, "eval_correct_gen_preds_34": 75.0, "eval_correct_gen_preds_35": 46.0, "eval_correct_gen_preds_36": 0.0, "eval_correct_preds": 353.0, "eval_correct_preds_32": 97.0, "eval_correct_preds_33": 113.0, "eval_correct_preds_34": 90.0, "eval_correct_preds_35": 53.0, "eval_correct_preds_36": 0.0, "eval_gen_accuracy": 0.5052631578947369, "eval_gen_accuracy_32": 0.4050632911392405, "eval_gen_accuracy_33": 0.6776315789473685, "eval_gen_accuracy_34": 0.528169014084507, "eval_gen_accuracy_35": 0.3898305084745763, "eval_gen_accuracy_36": 0.0, "eval_loss": 2.323270797729492, "eval_mdl": 1910.5097714565513, "eval_model_preparation_time": 0.0069, "eval_runtime": 2.2265, "eval_samples_per_second": 256.011, "eval_steps_per_second": 2.695, "eval_total_labels_32": 158.0, "eval_total_labels_33": 152.0, "eval_total_labels_34": 142.0, "eval_total_labels_35": 118.0, "eval_total_labels_36": 0.0, "eval_total_preds": 570.0, "step": 6 }, { "epoch": 7.0, "grad_norm": 0.0145263671875, "learning_rate": 1.9874388886763944e-05, "loss": 0.0, "step": 7 }, { "epoch": 7.0, "eval_accumulated_loss": 1518.1329746246338, "eval_accuracy": 0.6421052631578947, "eval_accuracy_32": 0.6392405063291139, "eval_accuracy_33": 0.756578947368421, "eval_accuracy_34": 0.6690140845070423, "eval_accuracy_35": 0.4661016949152542, "eval_accuracy_36": 0.0, "eval_correct_gen_preds": 306.0, "eval_correct_gen_preds_32": 66.0, "eval_correct_gen_preds_33": 108.0, "eval_correct_gen_preds_34": 81.0, "eval_correct_gen_preds_35": 51.0, "eval_correct_gen_preds_36": 0.0, "eval_correct_preds": 366.0, "eval_correct_preds_32": 101.0, "eval_correct_preds_33": 115.0, "eval_correct_preds_34": 95.0, "eval_correct_preds_35": 55.0, "eval_correct_preds_36": 0.0, "eval_gen_accuracy": 0.5368421052631579, "eval_gen_accuracy_32": 0.4177215189873418, "eval_gen_accuracy_33": 0.7105263157894737, "eval_gen_accuracy_34": 0.5704225352112676, "eval_gen_accuracy_35": 0.4322033898305085, "eval_gen_accuracy_36": 0.0, "eval_loss": 2.663391351699829, "eval_mdl": 2190.2029139009696, "eval_model_preparation_time": 0.0069, "eval_runtime": 3.9086, "eval_samples_per_second": 145.834, "eval_steps_per_second": 1.535, "eval_total_labels_32": 158.0, "eval_total_labels_33": 152.0, "eval_total_labels_34": 142.0, "eval_total_labels_35": 118.0, "eval_total_labels_36": 0.0, "eval_total_preds": 570.0, "step": 7 }, { "epoch": 8.0, "grad_norm": 0.006195068359375, "learning_rate": 1.9819286972627066e-05, "loss": 0.0, "step": 8 }, { "epoch": 8.0, "eval_accumulated_loss": 1669.1528663635254, "eval_accuracy": 0.6403508771929824, "eval_accuracy_32": 0.6392405063291139, "eval_accuracy_33": 0.743421052631579, "eval_accuracy_34": 0.676056338028169, "eval_accuracy_35": 0.4661016949152542, "eval_accuracy_36": 0.0, "eval_correct_gen_preds": 313.0, "eval_correct_gen_preds_32": 67.0, "eval_correct_gen_preds_33": 109.0, "eval_correct_gen_preds_34": 87.0, "eval_correct_gen_preds_35": 50.0, "eval_correct_gen_preds_36": 0.0, "eval_correct_preds": 365.0, "eval_correct_preds_32": 101.0, "eval_correct_preds_33": 113.0, "eval_correct_preds_34": 96.0, "eval_correct_preds_35": 55.0, "eval_correct_preds_36": 0.0, "eval_gen_accuracy": 0.5491228070175439, "eval_gen_accuracy_32": 0.4240506329113924, "eval_gen_accuracy_33": 0.7171052631578947, "eval_gen_accuracy_34": 0.6126760563380281, "eval_gen_accuracy_35": 0.423728813559322, "eval_gen_accuracy_36": 0.0, "eval_loss": 2.9283385276794434, "eval_mdl": 2408.078562788257, "eval_model_preparation_time": 0.0069, "eval_runtime": 2.0392, "eval_samples_per_second": 279.525, "eval_steps_per_second": 2.942, "eval_total_labels_32": 158.0, "eval_total_labels_33": 152.0, "eval_total_labels_34": 142.0, "eval_total_labels_35": 118.0, "eval_total_labels_36": 0.0, "eval_total_preds": 570.0, "step": 8 }, { "epoch": 9.0, "grad_norm": 0.0032501220703125, "learning_rate": 1.9754297868854075e-05, "loss": 0.0, "step": 9 }, { "epoch": 9.0, "eval_accumulated_loss": 1789.4745388031006, "eval_accuracy": 0.6491228070175439, "eval_accuracy_32": 0.6582278481012658, "eval_accuracy_33": 0.756578947368421, "eval_accuracy_34": 0.6830985915492958, "eval_accuracy_35": 0.4576271186440678, "eval_accuracy_36": 0.0, "eval_correct_gen_preds": 318.0, "eval_correct_gen_preds_32": 70.0, "eval_correct_gen_preds_33": 110.0, "eval_correct_gen_preds_34": 88.0, "eval_correct_gen_preds_35": 50.0, "eval_correct_gen_preds_36": 0.0, "eval_correct_preds": 370.0, "eval_correct_preds_32": 104.0, "eval_correct_preds_33": 115.0, "eval_correct_preds_34": 97.0, "eval_correct_preds_35": 54.0, "eval_correct_preds_36": 0.0, "eval_gen_accuracy": 0.5578947368421052, "eval_gen_accuracy_32": 0.4430379746835443, "eval_gen_accuracy_33": 0.7236842105263158, "eval_gen_accuracy_34": 0.6197183098591549, "eval_gen_accuracy_35": 0.423728813559322, "eval_gen_accuracy_36": 0.0, "eval_loss": 3.1394293308258057, "eval_mdl": 2581.6660429282983, "eval_model_preparation_time": 0.0069, "eval_runtime": 1.7388, "eval_samples_per_second": 327.807, "eval_steps_per_second": 3.451, "eval_total_labels_32": 158.0, "eval_total_labels_33": 152.0, "eval_total_labels_34": 142.0, "eval_total_labels_35": 118.0, "eval_total_labels_36": 0.0, "eval_total_preds": 570.0, "step": 9 }, { "epoch": 10.0, "grad_norm": 0.0020751953125, "learning_rate": 1.9679487013963566e-05, "loss": 0.0, "step": 10 }, { "epoch": 10.0, "eval_accumulated_loss": 1878.2606582641602, "eval_accuracy": 0.6456140350877193, "eval_accuracy_32": 0.6392405063291139, "eval_accuracy_33": 0.75, "eval_accuracy_34": 0.6901408450704225, "eval_accuracy_35": 0.4661016949152542, "eval_accuracy_36": 0.0, "eval_correct_gen_preds": 314.0, "eval_correct_gen_preds_32": 73.0, "eval_correct_gen_preds_33": 109.0, "eval_correct_gen_preds_34": 86.0, "eval_correct_gen_preds_35": 46.0, "eval_correct_gen_preds_36": 0.0, "eval_correct_preds": 368.0, "eval_correct_preds_32": 101.0, "eval_correct_preds_33": 114.0, "eval_correct_preds_34": 98.0, "eval_correct_preds_35": 55.0, "eval_correct_preds_36": 0.0, "eval_gen_accuracy": 0.5508771929824562, "eval_gen_accuracy_32": 0.4620253164556962, "eval_gen_accuracy_33": 0.7171052631578947, "eval_gen_accuracy_34": 0.6056338028169014, "eval_gen_accuracy_35": 0.3898305084745763, "eval_gen_accuracy_36": 0.0, "eval_loss": 3.295194387435913, "eval_mdl": 2709.757337174544, "eval_model_preparation_time": 0.0069, "eval_runtime": 4.1173, "eval_samples_per_second": 138.44, "eval_steps_per_second": 1.457, "eval_total_labels_32": 158.0, "eval_total_labels_33": 152.0, "eval_total_labels_34": 142.0, "eval_total_labels_35": 118.0, "eval_total_labels_36": 0.0, "eval_total_preds": 570.0, "step": 10 }, { "epoch": 11.0, "grad_norm": 0.001251220703125, "learning_rate": 1.9594929736144978e-05, "loss": 0.0, "step": 11 }, { "epoch": 11.0, "eval_accumulated_loss": 1943.7979278564453, "eval_accuracy": 0.6421052631578947, "eval_accuracy_32": 0.6329113924050633, "eval_accuracy_33": 0.75, "eval_accuracy_34": 0.6901408450704225, "eval_accuracy_35": 0.4576271186440678, "eval_accuracy_36": 0.0, "eval_correct_gen_preds": 318.0, "eval_correct_gen_preds_32": 74.0, "eval_correct_gen_preds_33": 109.0, "eval_correct_gen_preds_34": 89.0, "eval_correct_gen_preds_35": 46.0, "eval_correct_gen_preds_36": 0.0, "eval_correct_preds": 366.0, "eval_correct_preds_32": 100.0, "eval_correct_preds_33": 114.0, "eval_correct_preds_34": 98.0, "eval_correct_preds_35": 54.0, "eval_correct_preds_36": 0.0, "eval_gen_accuracy": 0.5578947368421052, "eval_gen_accuracy_32": 0.46835443037974683, "eval_gen_accuracy_33": 0.7171052631578947, "eval_gen_accuracy_34": 0.6267605633802817, "eval_gen_accuracy_35": 0.3898305084745763, "eval_gen_accuracy_36": 0.0, "eval_loss": 3.4101719856262207, "eval_mdl": 2804.3076310087367, "eval_model_preparation_time": 0.0069, "eval_runtime": 2.459, "eval_samples_per_second": 231.805, "eval_steps_per_second": 2.44, "eval_total_labels_32": 158.0, "eval_total_labels_33": 152.0, "eval_total_labels_34": 142.0, "eval_total_labels_35": 118.0, "eval_total_labels_36": 0.0, "eval_total_preds": 570.0, "step": 11 }, { "epoch": 12.0, "grad_norm": 0.0009918212890625, "learning_rate": 1.9500711177409456e-05, "loss": 0.0, "step": 12 }, { "epoch": 12.0, "eval_accumulated_loss": 1991.1740913391113, "eval_accuracy": 0.6421052631578947, "eval_accuracy_32": 0.6329113924050633, "eval_accuracy_33": 0.75, "eval_accuracy_34": 0.6901408450704225, "eval_accuracy_35": 0.4576271186440678, "eval_accuracy_36": 0.0, "eval_correct_gen_preds": 320.0, "eval_correct_gen_preds_32": 74.0, "eval_correct_gen_preds_33": 110.0, "eval_correct_gen_preds_34": 90.0, "eval_correct_gen_preds_35": 46.0, "eval_correct_gen_preds_36": 0.0, "eval_correct_preds": 366.0, "eval_correct_preds_32": 100.0, "eval_correct_preds_33": 114.0, "eval_correct_preds_34": 98.0, "eval_correct_preds_35": 54.0, "eval_correct_preds_36": 0.0, "eval_gen_accuracy": 0.5614035087719298, "eval_gen_accuracy_32": 0.46835443037974683, "eval_gen_accuracy_33": 0.7236842105263158, "eval_gen_accuracy_34": 0.6338028169014085, "eval_gen_accuracy_35": 0.3898305084745763, "eval_gen_accuracy_36": 0.0, "eval_loss": 3.493288040161133, "eval_mdl": 2872.656987121524, "eval_model_preparation_time": 0.0069, "eval_runtime": 1.9289, "eval_samples_per_second": 295.499, "eval_steps_per_second": 3.111, "eval_total_labels_32": 158.0, "eval_total_labels_33": 152.0, "eval_total_labels_34": 142.0, "eval_total_labels_35": 118.0, "eval_total_labels_36": 0.0, "eval_total_preds": 570.0, "step": 12 }, { "epoch": 13.0, "grad_norm": 0.0007476806640625, "learning_rate": 1.9396926207859085e-05, "loss": 0.0, "step": 13 }, { "epoch": 13.0, "eval_accumulated_loss": 2030.5798206329346, "eval_accuracy": 0.6385964912280702, "eval_accuracy_32": 0.6329113924050633, "eval_accuracy_33": 0.743421052631579, "eval_accuracy_34": 0.6830985915492958, "eval_accuracy_35": 0.4576271186440678, "eval_accuracy_36": 0.0, "eval_correct_gen_preds": 318.0, "eval_correct_gen_preds_32": 75.0, "eval_correct_gen_preds_33": 109.0, "eval_correct_gen_preds_34": 88.0, "eval_correct_gen_preds_35": 46.0, "eval_correct_gen_preds_36": 0.0, "eval_correct_preds": 364.0, "eval_correct_preds_32": 100.0, "eval_correct_preds_33": 113.0, "eval_correct_preds_34": 97.0, "eval_correct_preds_35": 54.0, "eval_correct_preds_36": 0.0, "eval_gen_accuracy": 0.5578947368421052, "eval_gen_accuracy_32": 0.47468354430379744, "eval_gen_accuracy_33": 0.7171052631578947, "eval_gen_accuracy_34": 0.6197183098591549, "eval_gen_accuracy_35": 0.3898305084745763, "eval_gen_accuracy_36": 0.0, "eval_loss": 3.5624210834503174, "eval_mdl": 2929.5074373563357, "eval_model_preparation_time": 0.0069, "eval_runtime": 3.2943, "eval_samples_per_second": 173.028, "eval_steps_per_second": 1.821, "eval_total_labels_32": 158.0, "eval_total_labels_33": 152.0, "eval_total_labels_34": 142.0, "eval_total_labels_35": 118.0, "eval_total_labels_36": 0.0, "eval_total_preds": 570.0, "step": 13 }, { "epoch": 14.0, "grad_norm": 0.00057220458984375, "learning_rate": 1.9283679330160726e-05, "loss": 0.0, "step": 14 }, { "epoch": 14.0, "eval_accumulated_loss": 2057.405590057373, "eval_accuracy": 0.6403508771929824, "eval_accuracy_32": 0.6329113924050633, "eval_accuracy_33": 0.75, "eval_accuracy_34": 0.6830985915492958, "eval_accuracy_35": 0.4576271186440678, "eval_accuracy_36": 0.0, "eval_correct_gen_preds": 318.0, "eval_correct_gen_preds_32": 71.0, "eval_correct_gen_preds_33": 111.0, "eval_correct_gen_preds_34": 90.0, "eval_correct_gen_preds_35": 46.0, "eval_correct_gen_preds_36": 0.0, "eval_correct_preds": 365.0, "eval_correct_preds_32": 100.0, "eval_correct_preds_33": 114.0, "eval_correct_preds_34": 97.0, "eval_correct_preds_35": 54.0, "eval_correct_preds_36": 0.0, "eval_gen_accuracy": 0.5578947368421052, "eval_gen_accuracy_32": 0.44936708860759494, "eval_gen_accuracy_33": 0.7302631578947368, "eval_gen_accuracy_34": 0.6338028169014085, "eval_gen_accuracy_35": 0.3898305084745763, "eval_gen_accuracy_36": 0.0, "eval_loss": 3.6094837188720703, "eval_mdl": 2968.2088418730036, "eval_model_preparation_time": 0.0069, "eval_runtime": 2.0027, "eval_samples_per_second": 284.614, "eval_steps_per_second": 2.996, "eval_total_labels_32": 158.0, "eval_total_labels_33": 152.0, "eval_total_labels_34": 142.0, "eval_total_labels_35": 118.0, "eval_total_labels_36": 0.0, "eval_total_preds": 570.0, "step": 14 }, { "epoch": 15.0, "grad_norm": 0.000545501708984375, "learning_rate": 1.9161084574320696e-05, "loss": 0.0, "step": 15 }, { "epoch": 15.0, "eval_accumulated_loss": 2082.8866996765137, "eval_accuracy": 0.6456140350877193, "eval_accuracy_32": 0.6455696202531646, "eval_accuracy_33": 0.743421052631579, "eval_accuracy_34": 0.6901408450704225, "eval_accuracy_35": 0.4661016949152542, "eval_accuracy_36": 0.0, "eval_correct_gen_preds": 319.0, "eval_correct_gen_preds_32": 73.0, "eval_correct_gen_preds_33": 110.0, "eval_correct_gen_preds_34": 90.0, "eval_correct_gen_preds_35": 46.0, "eval_correct_gen_preds_36": 0.0, "eval_correct_preds": 368.0, "eval_correct_preds_32": 102.0, "eval_correct_preds_33": 113.0, "eval_correct_preds_34": 98.0, "eval_correct_preds_35": 55.0, "eval_correct_preds_36": 0.0, "eval_gen_accuracy": 0.5596491228070175, "eval_gen_accuracy_32": 0.4620253164556962, "eval_gen_accuracy_33": 0.7236842105263158, "eval_gen_accuracy_34": 0.6338028169014085, "eval_gen_accuracy_35": 0.3898305084745763, "eval_gen_accuracy_36": 0.0, "eval_loss": 3.6541876792907715, "eval_mdl": 3004.970312356886, "eval_model_preparation_time": 0.0069, "eval_runtime": 2.6596, "eval_samples_per_second": 214.32, "eval_steps_per_second": 2.256, "eval_total_labels_32": 158.0, "eval_total_labels_33": 152.0, "eval_total_labels_34": 142.0, "eval_total_labels_35": 118.0, "eval_total_labels_36": 0.0, "eval_total_preds": 570.0, "step": 15 }, { "epoch": 16.0, "grad_norm": 0.0004711151123046875, "learning_rate": 1.9029265382866216e-05, "loss": 0.0, "step": 16 }, { "epoch": 16.0, "eval_accumulated_loss": 2088.4699153900146, "eval_accuracy": 0.643859649122807, "eval_accuracy_32": 0.6392405063291139, "eval_accuracy_33": 0.75, "eval_accuracy_34": 0.6901408450704225, "eval_accuracy_35": 0.4576271186440678, "eval_accuracy_36": 0.0, "eval_correct_gen_preds": 321.0, "eval_correct_gen_preds_32": 73.0, "eval_correct_gen_preds_33": 111.0, "eval_correct_gen_preds_34": 91.0, "eval_correct_gen_preds_35": 46.0, "eval_correct_gen_preds_36": 0.0, "eval_correct_preds": 367.0, "eval_correct_preds_32": 101.0, "eval_correct_preds_33": 114.0, "eval_correct_preds_34": 98.0, "eval_correct_preds_35": 54.0, "eval_correct_preds_36": 0.0, "eval_gen_accuracy": 0.5631578947368421, "eval_gen_accuracy_32": 0.4620253164556962, "eval_gen_accuracy_33": 0.7302631578947368, "eval_gen_accuracy_34": 0.6408450704225352, "eval_gen_accuracy_35": 0.3898305084745763, "eval_gen_accuracy_36": 0.0, "eval_loss": 3.66398286819458, "eval_mdl": 3013.0251899789673, "eval_model_preparation_time": 0.0069, "eval_runtime": 4.8066, "eval_samples_per_second": 118.587, "eval_steps_per_second": 1.248, "eval_total_labels_32": 158.0, "eval_total_labels_33": 152.0, "eval_total_labels_34": 142.0, "eval_total_labels_35": 118.0, "eval_total_labels_36": 0.0, "eval_total_preds": 570.0, "step": 16 }, { "epoch": 17.0, "grad_norm": 0.00041961669921875, "learning_rate": 1.8888354486549238e-05, "loss": 0.0, "step": 17 }, { "epoch": 17.0, "eval_accumulated_loss": 2108.461343765259, "eval_accuracy": 0.6473684210526316, "eval_accuracy_32": 0.6518987341772152, "eval_accuracy_33": 0.743421052631579, "eval_accuracy_34": 0.6901408450704225, "eval_accuracy_35": 0.4661016949152542, "eval_accuracy_36": 0.0, "eval_correct_gen_preds": 321.0, "eval_correct_gen_preds_32": 76.0, "eval_correct_gen_preds_33": 109.0, "eval_correct_gen_preds_34": 90.0, "eval_correct_gen_preds_35": 46.0, "eval_correct_gen_preds_36": 0.0, "eval_correct_preds": 369.0, "eval_correct_preds_32": 103.0, "eval_correct_preds_33": 113.0, "eval_correct_preds_34": 98.0, "eval_correct_preds_35": 55.0, "eval_correct_preds_36": 0.0, "eval_gen_accuracy": 0.5631578947368421, "eval_gen_accuracy_32": 0.4810126582278481, "eval_gen_accuracy_33": 0.7171052631578947, "eval_gen_accuracy_34": 0.6338028169014085, "eval_gen_accuracy_35": 0.3898305084745763, "eval_gen_accuracy_36": 0.0, "eval_loss": 3.6990551948547363, "eval_mdl": 3041.8667245562187, "eval_model_preparation_time": 0.0069, "eval_runtime": 2.3278, "eval_samples_per_second": 244.866, "eval_steps_per_second": 2.578, "eval_total_labels_32": 158.0, "eval_total_labels_33": 152.0, "eval_total_labels_34": 142.0, "eval_total_labels_35": 118.0, "eval_total_labels_36": 0.0, "eval_total_preds": 570.0, "step": 17 }, { "epoch": 18.0, "grad_norm": 0.00040435791015625, "learning_rate": 1.873849377069785e-05, "loss": 0.0, "step": 18 }, { "epoch": 18.0, "eval_accumulated_loss": 2120.72190284729, "eval_accuracy": 0.6491228070175439, "eval_accuracy_32": 0.6582278481012658, "eval_accuracy_33": 0.743421052631579, "eval_accuracy_34": 0.6830985915492958, "eval_accuracy_35": 0.4745762711864407, "eval_accuracy_36": 0.0, "eval_correct_gen_preds": 321.0, "eval_correct_gen_preds_32": 77.0, "eval_correct_gen_preds_33": 109.0, "eval_correct_gen_preds_34": 89.0, "eval_correct_gen_preds_35": 46.0, "eval_correct_gen_preds_36": 0.0, "eval_correct_preds": 370.0, "eval_correct_preds_32": 104.0, "eval_correct_preds_33": 113.0, "eval_correct_preds_34": 97.0, "eval_correct_preds_35": 56.0, "eval_correct_preds_36": 0.0, "eval_gen_accuracy": 0.5631578947368421, "eval_gen_accuracy_32": 0.4873417721518987, "eval_gen_accuracy_33": 0.7171052631578947, "eval_gen_accuracy_34": 0.6267605633802817, "eval_gen_accuracy_35": 0.3898305084745763, "eval_gen_accuracy_36": 0.0, "eval_loss": 3.7205653190612793, "eval_mdl": 3059.5549723423915, "eval_model_preparation_time": 0.0069, "eval_runtime": 5.5641, "eval_samples_per_second": 102.443, "eval_steps_per_second": 1.078, "eval_total_labels_32": 158.0, "eval_total_labels_33": 152.0, "eval_total_labels_34": 142.0, "eval_total_labels_35": 118.0, "eval_total_labels_36": 0.0, "eval_total_preds": 570.0, "step": 18 }, { "epoch": 19.0, "grad_norm": 0.0003910064697265625, "learning_rate": 1.8579834132349773e-05, "loss": 0.0, "step": 19 }, { "epoch": 19.0, "eval_accumulated_loss": 2125.0127182006836, "eval_accuracy": 0.6456140350877193, "eval_accuracy_32": 0.6455696202531646, "eval_accuracy_33": 0.75, "eval_accuracy_34": 0.6901408450704225, "eval_accuracy_35": 0.4576271186440678, "eval_accuracy_36": 0.0, "eval_correct_gen_preds": 320.0, "eval_correct_gen_preds_32": 74.0, "eval_correct_gen_preds_33": 110.0, "eval_correct_gen_preds_34": 90.0, "eval_correct_gen_preds_35": 46.0, "eval_correct_gen_preds_36": 0.0, "eval_correct_preds": 368.0, "eval_correct_preds_32": 102.0, "eval_correct_preds_33": 114.0, "eval_correct_preds_34": 98.0, "eval_correct_preds_35": 54.0, "eval_correct_preds_36": 0.0, "eval_gen_accuracy": 0.5614035087719298, "eval_gen_accuracy_32": 0.46835443037974683, "eval_gen_accuracy_33": 0.7236842105263158, "eval_gen_accuracy_34": 0.6338028169014085, "eval_gen_accuracy_35": 0.3898305084745763, "eval_gen_accuracy_36": 0.0, "eval_loss": 3.728092908859253, "eval_mdl": 3065.7453103741027, "eval_model_preparation_time": 0.0069, "eval_runtime": 1.8394, "eval_samples_per_second": 309.89, "eval_steps_per_second": 3.262, "eval_total_labels_32": 158.0, "eval_total_labels_33": 152.0, "eval_total_labels_34": 142.0, "eval_total_labels_35": 118.0, "eval_total_labels_36": 0.0, "eval_total_preds": 570.0, "step": 19 }, { "epoch": 20.0, "grad_norm": 0.0003662109375, "learning_rate": 1.8412535328311813e-05, "loss": 0.0, "step": 20 }, { "epoch": 20.0, "eval_accumulated_loss": 2130.648073196411, "eval_accuracy": 0.6473684210526316, "eval_accuracy_32": 0.6455696202531646, "eval_accuracy_33": 0.75, "eval_accuracy_34": 0.6830985915492958, "eval_accuracy_35": 0.4745762711864407, "eval_accuracy_36": 0.0, "eval_correct_gen_preds": 321.0, "eval_correct_gen_preds_32": 77.0, "eval_correct_gen_preds_33": 109.0, "eval_correct_gen_preds_34": 90.0, "eval_correct_gen_preds_35": 45.0, "eval_correct_gen_preds_36": 0.0, "eval_correct_preds": 369.0, "eval_correct_preds_32": 102.0, "eval_correct_preds_33": 114.0, "eval_correct_preds_34": 97.0, "eval_correct_preds_35": 56.0, "eval_correct_preds_36": 0.0, "eval_gen_accuracy": 0.5631578947368421, "eval_gen_accuracy_32": 0.4873417721518987, "eval_gen_accuracy_33": 0.7171052631578947, "eval_gen_accuracy_34": 0.6338028169014085, "eval_gen_accuracy_35": 0.3813559322033898, "eval_gen_accuracy_36": 0.0, "eval_loss": 3.7379796504974365, "eval_mdl": 3073.8754090800876, "eval_model_preparation_time": 0.0069, "eval_runtime": 2.5005, "eval_samples_per_second": 227.954, "eval_steps_per_second": 2.4, "eval_total_labels_32": 158.0, "eval_total_labels_33": 152.0, "eval_total_labels_34": 142.0, "eval_total_labels_35": 118.0, "eval_total_labels_36": 0.0, "eval_total_preds": 570.0, "step": 20 }, { "epoch": 21.0, "grad_norm": 0.0003643035888671875, "learning_rate": 1.8236765814298328e-05, "loss": 0.0, "step": 21 }, { "epoch": 21.0, "eval_accumulated_loss": 2130.965400695801, "eval_accuracy": 0.6508771929824562, "eval_accuracy_32": 0.6518987341772152, "eval_accuracy_33": 0.743421052631579, "eval_accuracy_34": 0.6971830985915493, "eval_accuracy_35": 0.4745762711864407, "eval_accuracy_36": 0.0, "eval_correct_gen_preds": 326.0, "eval_correct_gen_preds_32": 79.0, "eval_correct_gen_preds_33": 110.0, "eval_correct_gen_preds_34": 91.0, "eval_correct_gen_preds_35": 46.0, "eval_correct_gen_preds_36": 0.0, "eval_correct_preds": 371.0, "eval_correct_preds_32": 103.0, "eval_correct_preds_33": 113.0, "eval_correct_preds_34": 99.0, "eval_correct_preds_35": 56.0, "eval_correct_preds_36": 0.0, "eval_gen_accuracy": 0.5719298245614035, "eval_gen_accuracy_32": 0.5, "eval_gen_accuracy_33": 0.7236842105263158, "eval_gen_accuracy_34": 0.6408450704225352, "eval_gen_accuracy_35": 0.3898305084745763, "eval_gen_accuracy_36": 0.0, "eval_loss": 3.7385361194610596, "eval_mdl": 3074.3332158897947, "eval_model_preparation_time": 0.0069, "eval_runtime": 2.367, "eval_samples_per_second": 240.814, "eval_steps_per_second": 2.535, "eval_total_labels_32": 158.0, "eval_total_labels_33": 152.0, "eval_total_labels_34": 142.0, "eval_total_labels_35": 118.0, "eval_total_labels_36": 0.0, "eval_total_preds": 570.0, "step": 21 } ], "logging_steps": 1, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 30, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 147139692134400.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }