{ "iteration": 200000, "best_val_loss": 5.296863555908203, "early_stopping_counter": 17, "learning_rate": 6e-06, "batch_size": 12, "block_size": 1024, "model_args": { "n_layer": 12, "n_head": 12, "n_embd": 768, "block_size": 1024, "bias": false, "vocab_size": 4096, "dropout": 0.0 } }