{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 6.0, "eval_steps": 500, "global_step": 378, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07936507936507936, "grad_norm": 18.09902303910361, "learning_rate": 4.210526315789474e-06, "loss": 1.0034, "loss_nan_ranks": 0, "loss_rank_avg": 0.996525764465332, "step": 5, "valid_targets_mean": 1317.2, "valid_targets_min": 650 }, { "epoch": 0.15873015873015872, "grad_norm": 8.16991336123599, "learning_rate": 9.473684210526315e-06, "loss": 0.8848, "loss_nan_ranks": 0, "loss_rank_avg": 0.7810754776000977, "step": 10, "valid_targets_mean": 1280.9, "valid_targets_min": 714 }, { "epoch": 0.23809523809523808, "grad_norm": 3.1633278706676378, "learning_rate": 1.4736842105263159e-05, "loss": 0.6728, "loss_nan_ranks": 0, "loss_rank_avg": 0.6020610332489014, "step": 15, "valid_targets_mean": 1262.5, "valid_targets_min": 651 }, { "epoch": 0.31746031746031744, "grad_norm": 2.144961671140719, "learning_rate": 2e-05, "loss": 0.5289, "loss_nan_ranks": 0, "loss_rank_avg": 0.5145202875137329, "step": 20, "valid_targets_mean": 1289.9, "valid_targets_min": 794 }, { "epoch": 0.3968253968253968, "grad_norm": 1.5113256761550613, "learning_rate": 2.526315789473684e-05, "loss": 0.4561, "loss_nan_ranks": 0, "loss_rank_avg": 0.43933457136154175, "step": 25, "valid_targets_mean": 1349.3, "valid_targets_min": 683 }, { "epoch": 0.47619047619047616, "grad_norm": 1.4341124853993426, "learning_rate": 3.052631578947369e-05, "loss": 0.3862, "loss_nan_ranks": 0, "loss_rank_avg": 0.36623549461364746, "step": 30, "valid_targets_mean": 1292.4, "valid_targets_min": 730 }, { "epoch": 0.5555555555555556, "grad_norm": 1.1254468593395395, "learning_rate": 3.578947368421053e-05, "loss": 0.352, "loss_nan_ranks": 0, "loss_rank_avg": 0.32470637559890747, "step": 35, "valid_targets_mean": 1341.6, "valid_targets_min": 702 }, { "epoch": 0.6349206349206349, "grad_norm": 1.1077925223265233, "learning_rate": 3.999914623406736e-05, "loss": 0.3518, "loss_nan_ranks": 0, "loss_rank_avg": 0.3396565914154053, "step": 40, "valid_targets_mean": 1224.6, "valid_targets_min": 475 }, { "epoch": 0.7142857142857143, "grad_norm": 1.0453242597684407, "learning_rate": 3.9969272079348685e-05, "loss": 0.3189, "loss_nan_ranks": 0, "loss_rank_avg": 0.3043442964553833, "step": 45, "valid_targets_mean": 1244.7, "valid_targets_min": 717 }, { "epoch": 0.7936507936507936, "grad_norm": 0.9888625604591795, "learning_rate": 3.989678249165612e-05, "loss": 0.3063, "loss_nan_ranks": 0, "loss_rank_avg": 0.29527759552001953, "step": 50, "valid_targets_mean": 1431.3, "valid_targets_min": 761 }, { "epoch": 0.873015873015873, "grad_norm": 0.9865480113822774, "learning_rate": 3.9781832167422926e-05, "loss": 0.2988, "loss_nan_ranks": 0, "loss_rank_avg": 0.3123767673969269, "step": 55, "valid_targets_mean": 1331.8, "valid_targets_min": 706 }, { "epoch": 0.9523809523809523, "grad_norm": 0.9489794948782682, "learning_rate": 3.962466641643398e-05, "loss": 0.2983, "loss_nan_ranks": 0, "loss_rank_avg": 0.31309300661087036, "step": 60, "valid_targets_mean": 1493.4, "valid_targets_min": 832 }, { "epoch": 1.0317460317460316, "grad_norm": 1.1071899523713842, "learning_rate": 3.942562063832228e-05, "loss": 0.2824, "loss_nan_ranks": 0, "loss_rank_avg": 0.2574073374271393, "step": 65, "valid_targets_mean": 1164.1, "valid_targets_min": 650 }, { "epoch": 1.1111111111111112, "grad_norm": 1.1419435529606288, "learning_rate": 3.9185119606809305e-05, "loss": 0.2641, "loss_nan_ranks": 0, "loss_rank_avg": 0.24323183298110962, "step": 70, "valid_targets_mean": 1309.0, "valid_targets_min": 774 }, { "epoch": 1.1904761904761905, "grad_norm": 0.8396527150729525, "learning_rate": 3.89036765632164e-05, "loss": 0.2622, "loss_nan_ranks": 0, "loss_rank_avg": 0.24688035249710083, "step": 75, "valid_targets_mean": 1233.3, "valid_targets_min": 648 }, { "epoch": 1.2698412698412698, "grad_norm": 1.031066680643171, "learning_rate": 3.8581892121181984e-05, "loss": 0.2588, "loss_nan_ranks": 0, "loss_rank_avg": 0.2587030231952667, "step": 80, "valid_targets_mean": 1194.1, "valid_targets_min": 683 }, { "epoch": 1.3492063492063493, "grad_norm": 1.0087562996042767, "learning_rate": 3.822045298492177e-05, "loss": 0.2604, "loss_nan_ranks": 0, "loss_rank_avg": 0.26486936211586, "step": 85, "valid_targets_mean": 1248.9, "valid_targets_min": 620 }, { "epoch": 1.4285714285714286, "grad_norm": 0.8956917783711329, "learning_rate": 3.782013048376736e-05, "loss": 0.2495, "loss_nan_ranks": 0, "loss_rank_avg": 0.26159846782684326, "step": 90, "valid_targets_mean": 1584.2, "valid_targets_min": 801 }, { "epoch": 1.507936507936508, "grad_norm": 1.0889510509050193, "learning_rate": 3.738177892611057e-05, "loss": 0.2597, "loss_nan_ranks": 0, "loss_rank_avg": 0.263064444065094, "step": 95, "valid_targets_mean": 1269.8, "valid_targets_min": 622 }, { "epoch": 1.5873015873015874, "grad_norm": 0.9208538453912383, "learning_rate": 3.690633377626628e-05, "loss": 0.2569, "loss_nan_ranks": 0, "loss_rank_avg": 0.2552173435688019, "step": 100, "valid_targets_mean": 1260.8, "valid_targets_min": 644 }, { "epoch": 1.6666666666666665, "grad_norm": 1.0431803629591703, "learning_rate": 3.639480965814443e-05, "loss": 0.2439, "loss_nan_ranks": 0, "loss_rank_avg": 0.2334054708480835, "step": 105, "valid_targets_mean": 1119.9, "valid_targets_min": 494 }, { "epoch": 1.746031746031746, "grad_norm": 1.052527932597642, "learning_rate": 3.584829818999148e-05, "loss": 0.2439, "loss_nan_ranks": 0, "loss_rank_avg": 0.26042911410331726, "step": 110, "valid_targets_mean": 1358.5, "valid_targets_min": 746 }, { "epoch": 1.8253968253968254, "grad_norm": 1.2050528658411404, "learning_rate": 3.526796565482206e-05, "loss": 0.2445, "loss_nan_ranks": 0, "loss_rank_avg": 0.24487106502056122, "step": 115, "valid_targets_mean": 1421.6, "valid_targets_min": 729 }, { "epoch": 1.9047619047619047, "grad_norm": 0.9072467275412588, "learning_rate": 3.4655050511512236e-05, "loss": 0.2426, "loss_nan_ranks": 0, "loss_rank_avg": 0.25254106521606445, "step": 120, "valid_targets_mean": 1449.0, "valid_targets_min": 837 }, { "epoch": 1.9841269841269842, "grad_norm": 1.0303768935216235, "learning_rate": 3.401086075186582e-05, "loss": 0.248, "loss_nan_ranks": 0, "loss_rank_avg": 0.24224883317947388, "step": 125, "valid_targets_mean": 1346.0, "valid_targets_min": 640 }, { "epoch": 2.0634920634920633, "grad_norm": 1.0599334863834182, "learning_rate": 3.333677110929403e-05, "loss": 0.2222, "loss_nan_ranks": 0, "loss_rank_avg": 0.22304898500442505, "step": 130, "valid_targets_mean": 1223.6, "valid_targets_min": 686 }, { "epoch": 2.142857142857143, "grad_norm": 0.9942911032267476, "learning_rate": 3.263422012506502e-05, "loss": 0.2136, "loss_nan_ranks": 0, "loss_rank_avg": 0.23300984501838684, "step": 135, "valid_targets_mean": 1373.8, "valid_targets_min": 714 }, { "epoch": 2.2222222222222223, "grad_norm": 0.9867493029938139, "learning_rate": 3.190470707838438e-05, "loss": 0.211, "loss_nan_ranks": 0, "loss_rank_avg": 0.1962147355079651, "step": 140, "valid_targets_mean": 1268.1, "valid_targets_min": 618 }, { "epoch": 2.3015873015873014, "grad_norm": 0.9683968495450838, "learning_rate": 3.114978878685771e-05, "loss": 0.215, "loss_nan_ranks": 0, "loss_rank_avg": 0.22331558167934418, "step": 145, "valid_targets_mean": 1451.9, "valid_targets_min": 718 }, { "epoch": 2.380952380952381, "grad_norm": 3.9084274034442066, "learning_rate": 3.0371076284163442e-05, "loss": 0.2155, "loss_nan_ranks": 0, "loss_rank_avg": 0.22763928771018982, "step": 150, "valid_targets_mean": 1204.6, "valid_targets_min": 574 }, { "epoch": 2.4603174603174605, "grad_norm": 1.0158116125994063, "learning_rate": 2.9570231382025732e-05, "loss": 0.2038, "loss_nan_ranks": 0, "loss_rank_avg": 0.18950828909873962, "step": 155, "valid_targets_mean": 1317.1, "valid_targets_min": 904 }, { "epoch": 2.5396825396825395, "grad_norm": 0.896681347419589, "learning_rate": 2.8748963123824532e-05, "loss": 0.218, "loss_nan_ranks": 0, "loss_rank_avg": 0.22112029790878296, "step": 160, "valid_targets_mean": 1424.4, "valid_targets_min": 661 }, { "epoch": 2.619047619047619, "grad_norm": 1.0606529423953202, "learning_rate": 2.790902413741085e-05, "loss": 0.2133, "loss_nan_ranks": 0, "loss_rank_avg": 0.2376096546649933, "step": 165, "valid_targets_mean": 1399.1, "valid_targets_min": 740 }, { "epoch": 2.6984126984126986, "grad_norm": 0.9396280814023571, "learning_rate": 2.7052206894910653e-05, "loss": 0.2056, "loss_nan_ranks": 0, "loss_rank_avg": 0.2136600762605667, "step": 170, "valid_targets_mean": 1357.7, "valid_targets_min": 555 }, { "epoch": 2.7777777777777777, "grad_norm": 1.083930671175511, "learning_rate": 2.618033988749895e-05, "loss": 0.2032, "loss_nan_ranks": 0, "loss_rank_avg": 0.19602251052856445, "step": 175, "valid_targets_mean": 1215.0, "valid_targets_min": 615 }, { "epoch": 2.857142857142857, "grad_norm": 0.979793298540035, "learning_rate": 2.5295283723307517e-05, "loss": 0.2162, "loss_nan_ranks": 0, "loss_rank_avg": 0.2314784824848175, "step": 180, "valid_targets_mean": 1266.5, "valid_targets_min": 718 }, { "epoch": 2.9365079365079367, "grad_norm": 1.013880387387088, "learning_rate": 2.4398927156793376e-05, "loss": 0.2041, "loss_nan_ranks": 0, "loss_rank_avg": 0.20053911209106445, "step": 185, "valid_targets_mean": 1415.3, "valid_targets_min": 823 }, { "epoch": 3.015873015873016, "grad_norm": 0.9506724254980058, "learning_rate": 2.3493183058041578e-05, "loss": 0.2014, "loss_nan_ranks": 0, "loss_rank_avg": 0.18665489554405212, "step": 190, "valid_targets_mean": 1395.1, "valid_targets_min": 620 }, { "epoch": 3.0952380952380953, "grad_norm": 1.2045650788528535, "learning_rate": 2.257998433060407e-05, "loss": 0.1842, "loss_nan_ranks": 0, "loss_rank_avg": 0.1960422694683075, "step": 195, "valid_targets_mean": 1277.2, "valid_targets_min": 748 }, { "epoch": 3.1746031746031744, "grad_norm": 0.9904415244786193, "learning_rate": 2.166127978658608e-05, "loss": 0.185, "loss_nan_ranks": 0, "loss_rank_avg": 0.18444815278053284, "step": 200, "valid_targets_mean": 1417.6, "valid_targets_min": 805 }, { "epoch": 3.253968253968254, "grad_norm": 0.9573080350285159, "learning_rate": 2.0739029987782903e-05, "loss": 0.186, "loss_nan_ranks": 0, "loss_rank_avg": 0.18823915719985962, "step": 205, "valid_targets_mean": 1211.6, "valid_targets_min": 574 }, { "epoch": 3.3333333333333335, "grad_norm": 0.8982800713768102, "learning_rate": 1.9815203061742188e-05, "loss": 0.1815, "loss_nan_ranks": 0, "loss_rank_avg": 0.1576843112707138, "step": 210, "valid_targets_mean": 1294.8, "valid_targets_min": 813 }, { "epoch": 3.4126984126984126, "grad_norm": 0.9855158183383274, "learning_rate": 1.8891770501680602e-05, "loss": 0.1815, "loss_nan_ranks": 0, "loss_rank_avg": 0.18304908275604248, "step": 215, "valid_targets_mean": 1243.9, "valid_targets_min": 774 }, { "epoch": 3.492063492063492, "grad_norm": 1.0040666267759992, "learning_rate": 1.7970702959217944e-05, "loss": 0.1768, "loss_nan_ranks": 0, "loss_rank_avg": 0.1883706897497177, "step": 220, "valid_targets_mean": 1353.8, "valid_targets_min": 640 }, { "epoch": 3.571428571428571, "grad_norm": 1.4558785666136118, "learning_rate": 1.705396603890725e-05, "loss": 0.1801, "loss_nan_ranks": 0, "loss_rank_avg": 0.20024870336055756, "step": 225, "valid_targets_mean": 1336.3, "valid_targets_min": 618 }, { "epoch": 3.6507936507936507, "grad_norm": 1.0249713499978004, "learning_rate": 1.6143516103535666e-05, "loss": 0.1845, "loss_nan_ranks": 0, "loss_rank_avg": 0.17252278327941895, "step": 230, "valid_targets_mean": 1262.4, "valid_targets_min": 680 }, { "epoch": 3.7301587301587302, "grad_norm": 0.9876581877640742, "learning_rate": 1.524129609914763e-05, "loss": 0.1806, "loss_nan_ranks": 0, "loss_rank_avg": 0.18393632769584656, "step": 235, "valid_targets_mean": 1363.2, "valid_targets_min": 538 }, { "epoch": 3.8095238095238093, "grad_norm": 0.951472253586105, "learning_rate": 1.43492314087001e-05, "loss": 0.1815, "loss_nan_ranks": 0, "loss_rank_avg": 0.19082951545715332, "step": 240, "valid_targets_mean": 1442.2, "valid_targets_min": 935 }, { "epoch": 3.888888888888889, "grad_norm": 1.1055947291031696, "learning_rate": 1.3469225743198337e-05, "loss": 0.1893, "loss_nan_ranks": 0, "loss_rank_avg": 0.18884754180908203, "step": 245, "valid_targets_mean": 1243.9, "valid_targets_min": 721 }, { "epoch": 3.9682539682539684, "grad_norm": 0.9454028728577349, "learning_rate": 1.260315707908062e-05, "loss": 0.1806, "loss_nan_ranks": 0, "loss_rank_avg": 0.19582678377628326, "step": 250, "valid_targets_mean": 1453.9, "valid_targets_min": 729 }, { "epoch": 4.0476190476190474, "grad_norm": 0.9615770906418738, "learning_rate": 1.1752873650521934e-05, "loss": 0.1684, "loss_nan_ranks": 0, "loss_rank_avg": 0.1702508181333542, "step": 255, "valid_targets_mean": 1367.9, "valid_targets_min": 833 }, { "epoch": 4.1269841269841265, "grad_norm": 1.0494388437222053, "learning_rate": 1.0920190005209066e-05, "loss": 0.1671, "loss_nan_ranks": 0, "loss_rank_avg": 0.1574375033378601, "step": 260, "valid_targets_mean": 1268.7, "valid_targets_min": 748 }, { "epoch": 4.2063492063492065, "grad_norm": 1.058177748509928, "learning_rate": 1.0106883132004428e-05, "loss": 0.1615, "loss_nan_ranks": 0, "loss_rank_avg": 0.15336745977401733, "step": 265, "valid_targets_mean": 1281.3, "valid_targets_min": 679 }, { "epoch": 4.285714285714286, "grad_norm": 1.1128115262277665, "learning_rate": 9.314688668762232e-06, "loss": 0.1555, "loss_nan_ranks": 0, "loss_rank_avg": 0.15343694388866425, "step": 270, "valid_targets_mean": 1171.1, "valid_targets_min": 683 }, { "epoch": 4.365079365079365, "grad_norm": 0.9068931883638662, "learning_rate": 8.545297198389896e-06, "loss": 0.1524, "loss_nan_ranks": 0, "loss_rank_avg": 0.1522439420223236, "step": 275, "valid_targets_mean": 1484.4, "valid_targets_min": 692 }, { "epoch": 4.444444444444445, "grad_norm": 0.987267277849222, "learning_rate": 7.800350641058867e-06, "loss": 0.1741, "loss_nan_ranks": 0, "loss_rank_avg": 0.17415179312229156, "step": 280, "valid_targets_mean": 1327.5, "valid_targets_min": 834 }, { "epoch": 4.523809523809524, "grad_norm": 0.9406767373189786, "learning_rate": 7.081438750264258e-06, "loss": 0.163, "loss_nan_ranks": 0, "loss_rank_avg": 0.1646547019481659, "step": 285, "valid_targets_mean": 1483.0, "valid_targets_min": 718 }, { "epoch": 4.603174603174603, "grad_norm": 1.1253357217876951, "learning_rate": 6.3900957202107695e-06, "loss": 0.1564, "loss_nan_ranks": 0, "loss_rank_avg": 0.1593586802482605, "step": 290, "valid_targets_mean": 1123.4, "valid_targets_min": 702 }, { "epoch": 4.682539682539683, "grad_norm": 1.0811536769207744, "learning_rate": 5.727796911764955e-06, "loss": 0.1698, "loss_nan_ranks": 0, "loss_rank_avg": 0.17252852022647858, "step": 295, "valid_targets_mean": 1290.1, "valid_targets_min": 622 }, { "epoch": 4.761904761904762, "grad_norm": 1.0374368953244144, "learning_rate": 5.095955703960746e-06, "loss": 0.1625, "loss_nan_ranks": 0, "loss_rank_avg": 0.17260053753852844, "step": 300, "valid_targets_mean": 1328.1, "valid_targets_min": 643 }, { "epoch": 4.841269841269841, "grad_norm": 1.034565590736608, "learning_rate": 4.495920477777403e-06, "loss": 0.1551, "loss_nan_ranks": 0, "loss_rank_avg": 0.14985054731369019, "step": 305, "valid_targets_mean": 1234.3, "valid_targets_min": 615 }, { "epoch": 4.920634920634921, "grad_norm": 1.0765022207707986, "learning_rate": 3.9289717386265255e-06, "loss": 0.1592, "loss_nan_ranks": 0, "loss_rank_avg": 0.1612224131822586, "step": 310, "valid_targets_mean": 1193.8, "valid_targets_min": 740 }, { "epoch": 5.0, "grad_norm": 0.9478245585431166, "learning_rate": 3.3963193836889907e-06, "loss": 0.157, "loss_nan_ranks": 0, "loss_rank_avg": 0.1592485010623932, "step": 315, "valid_targets_mean": 1360.1, "valid_targets_min": 706 }, { "epoch": 5.079365079365079, "grad_norm": 1.0328174266538293, "learning_rate": 2.89910011993338e-06, "loss": 0.151, "loss_nan_ranks": 0, "loss_rank_avg": 0.14890140295028687, "step": 320, "valid_targets_mean": 1574.1, "valid_targets_min": 718 }, { "epoch": 5.158730158730159, "grad_norm": 0.9579410192765627, "learning_rate": 2.4383750383260417e-06, "loss": 0.1526, "loss_nan_ranks": 0, "loss_rank_avg": 0.14151515066623688, "step": 325, "valid_targets_mean": 1227.1, "valid_targets_min": 680 }, { "epoch": 5.238095238095238, "grad_norm": 1.0498590988698493, "learning_rate": 2.015127349409489e-06, "loss": 0.1492, "loss_nan_ranks": 0, "loss_rank_avg": 0.15444359183311462, "step": 330, "valid_targets_mean": 1198.4, "valid_targets_min": 620 }, { "epoch": 5.317460317460317, "grad_norm": 1.090019046173157, "learning_rate": 1.6302602850815397e-06, "loss": 0.1475, "loss_nan_ranks": 0, "loss_rank_avg": 0.15183570981025696, "step": 335, "valid_targets_mean": 1241.6, "valid_targets_min": 692 }, { "epoch": 5.396825396825397, "grad_norm": 1.0209735271764744, "learning_rate": 1.2845951710529513e-06, "loss": 0.1564, "loss_nan_ranks": 0, "loss_rank_avg": 0.16618052124977112, "step": 340, "valid_targets_mean": 1384.1, "valid_targets_min": 826 }, { "epoch": 5.476190476190476, "grad_norm": 1.167276848177137, "learning_rate": 9.788696740969295e-07, "loss": 0.1449, "loss_nan_ranks": 0, "loss_rank_avg": 0.15419039130210876, "step": 345, "valid_targets_mean": 1056.2, "valid_targets_min": 622 }, { "epoch": 5.555555555555555, "grad_norm": 1.0131829348691888, "learning_rate": 7.137362278311033e-07, "loss": 0.1472, "loss_nan_ranks": 0, "loss_rank_avg": 0.13364669680595398, "step": 350, "valid_targets_mean": 1154.5, "valid_targets_min": 644 }, { "epoch": 5.634920634920634, "grad_norm": 1.1216486988072, "learning_rate": 4.89760640391268e-07, "loss": 0.1492, "loss_nan_ranks": 0, "loss_rank_avg": 0.1446513682603836, "step": 355, "valid_targets_mean": 1223.9, "valid_targets_min": 615 }, { "epoch": 5.714285714285714, "grad_norm": 1.0546772616513356, "learning_rate": 3.074208869683282e-07, "loss": 0.1573, "loss_nan_ranks": 0, "loss_rank_avg": 0.14680185914039612, "step": 360, "valid_targets_mean": 1361.4, "valid_targets_min": 882 }, { "epoch": 5.7936507936507935, "grad_norm": 1.1759274152221304, "learning_rate": 1.6710608978514509e-07, "loss": 0.1529, "loss_nan_ranks": 0, "loss_rank_avg": 0.16227003931999207, "step": 365, "valid_targets_mean": 1172.2, "valid_targets_min": 636 }, { "epoch": 5.8730158730158735, "grad_norm": 1.064627033500516, "learning_rate": 6.91156876901089e-08, "loss": 0.1514, "loss_nan_ranks": 0, "loss_rank_avg": 0.18192672729492188, "step": 370, "valid_targets_mean": 1303.9, "valid_targets_min": 618 }, { "epoch": 5.9523809523809526, "grad_norm": 1.1282545011521061, "learning_rate": 1.365879713954188e-08, "loss": 0.1454, "loss_nan_ranks": 0, "loss_rank_avg": 0.1329009234905243, "step": 375, "valid_targets_mean": 1124.7, "valid_targets_min": 702 }, { "epoch": 6.0, "loss_nan_ranks": 0, "loss_rank_avg": 0.15850412845611572, "step": 378, "total_flos": 39306874650624.0, "train_loss": 0.23945725027215545, "train_runtime": 1368.0623, "train_samples_per_second": 4.386, "train_steps_per_second": 0.276, "valid_targets_mean": 1612.7, "valid_targets_min": 692 } ], "logging_steps": 5, "max_steps": 378, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 39306874650624.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }