{ "base_model": "kmseong/Llama-3.1-8B-base-SSFT_lr5e-5", "fine_tuning_type": "Full Parameter Fine-tuning", "dataset": "GSM8K", "num_train_samples": 7473, "batch_size": 4, "grad_accum": 4, "learning_rate": 1e-05, "weight_decay": 0.01, "warmup_ratio": 0.1, "epochs": 3, "max_length": 1024, "max_grad_norm": 1.0, "lr_scheduler_type": "cosine", "optimizer": "AdamW (torch)", "gradient_checkpointing": false, "dtype": "bf16", "trainer_type": "Trainer", "safety_mix_ratio": 0.0, "safety_data_path": null }